(retrofir from trunk) N° 615: spreadsheet export enhancement to remove unneeded line breaks.

SVN:2.3[4550]
This commit is contained in:
Denis Flaven
2017-01-31 13:32:16 +00:00
parent d898cffd4e
commit f7f4fbce51

View File

@@ -31,6 +31,7 @@ class SpreadsheetBulkExport extends TabularBulkExport
$oP->p(" *\tfields: (mandatory) the comma separated list of field codes to export (e.g: name,org_id,service_name...)."); $oP->p(" *\tfields: (mandatory) the comma separated list of field codes to export (e.g: name,org_id,service_name...).");
$oP->p(" *\tno_localize: (optional) pass 1 to retrieve the raw (untranslated) values for enumerated fields. Default: 0."); $oP->p(" *\tno_localize: (optional) pass 1 to retrieve the raw (untranslated) values for enumerated fields. Default: 0.");
$oP->p(" *\tdate_format: the format to use when exporting date and time fields (default = the SQL format). e.g. 'Y-m-d H:i:s'"); $oP->p(" *\tdate_format: the format to use when exporting date and time fields (default = the SQL format). e.g. 'Y-m-d H:i:s'");
$oP->p(" *\tformatted_text: set to 1 to formatted text fields with their HTML markup, 0 to remove formatting. Default is 1 (= formatted text)");
} }
public function EnumFormParts() public function EnumFormParts()
@@ -51,7 +52,14 @@ class SpreadsheetBulkExport extends TabularBulkExport
$oP->add('<fieldset><legend>'.Dict::S('Core:BulkExport:SpreadsheetOptions').'</legend>'); $oP->add('<fieldset><legend>'.Dict::S('Core:BulkExport:SpreadsheetOptions').'</legend>');
$oP->add('<table>'); $oP->add('<table>');
$oP->add('<tr>'); $oP->add('<tr>');
$oP->add('<td><input type="checkbox" id="spreadsheet_no_localize" name="no_localize" value="1"'.$sChecked.'><label for="spreadsheet_no_localize"> '.Dict::S('Core:BulkExport:OptionNoLocalize').'</label></td>');
$oP->add('<td style="vertical-align:top">');
$sChecked = (utils::ReadParam('formatted_text', 1) == 1) ? ' checked ' : '';
$oP->add('<h3>'.Dict::S('Core:BulkExport:TextFormat').'</h3>');
$oP->add('<input type="hidden" name="formatted_text" value="0">'); // Trick to pass the zero value if the checkbox below is unchecked, since we want the default value to be "1"
$oP->add('<input type="checkbox" id="spreadsheet_formatted_text" name="formatted_text" value="1"'.$sChecked.'><label for="spreadsheet_formatted_text"> '.Dict::S('Core:BulkExport:OptionFormattedText').'</label><br/><br/>');
$oP->add('<input type="checkbox" id="spreadsheet_no_localize" name="no_localize" value="1"'.$sChecked.'><label for="spreadsheet_no_localize"> '.Dict::S('Core:BulkExport:OptionNoLocalize').'</label>');
$oP->add('</td>');
$sDateTimeFormat = utils::ReadParam('date_format', (string)AttributeDateTime::GetFormat(), true, 'raw_data'); $sDateTimeFormat = utils::ReadParam('date_format', (string)AttributeDateTime::GetFormat(), true, 'raw_data');
$sDefaultChecked = ($sDateTimeFormat == (string)AttributeDateTime::GetFormat()) ? ' checked' : ''; $sDefaultChecked = ($sDateTimeFormat == (string)AttributeDateTime::GetFormat()) ? ' checked' : '';
@@ -90,6 +98,7 @@ EOF
public function ReadParameters() public function ReadParameters()
{ {
parent::ReadParameters(); parent::ReadParameters();
$this->aStatusInfo['formatted_text'] = (bool)utils::ReadParam('formatted_text', 1, true);
$sDateFormatRadio = utils::ReadParam('spreadsheet_date_format_radio', ''); $sDateFormatRadio = utils::ReadParam('spreadsheet_date_format_radio', '');
switch($sDateFormatRadio) switch($sDateFormatRadio)
@@ -126,6 +135,7 @@ EOF
protected function GetValue($oObj, $sAttCode) protected function GetValue($oObj, $sAttCode)
{ {
$bFormattedText = (array_key_exists('formatted_text', $this->aStatusInfo) ? $this->aStatusInfo['formatted_text'] : false);
switch($sAttCode) switch($sAttCode)
{ {
case 'id': case 'id':
@@ -147,6 +157,18 @@ EOF
{ {
$sRet = ''; $sRet = '';
} }
elseif ($oAttDef instanceof AttributeText)
{
if ($bFormattedText)
{
// Replace paragraphs (<p...>...</p>, etc) by line breaks (<br/>) since Excel (pre-2016) splits the cells when there is a paragraph
$sRet = static::HtmlToSpreadsheet($oObj->GetAsHTML($sAttCode));
}
else
{
$sRet = utils::HtmlToText($oObj->GetAsHTML($sAttCode));
}
}
elseif ($oAttDef instanceof AttributeString) elseif ($oAttDef instanceof AttributeString)
{ {
$sRet = $oObj->GetAsHTML($sAttCode); $sRet = $oObj->GetAsHTML($sAttCode);
@@ -232,6 +254,7 @@ EOF
$this->OptimizeColumnLoad($oSet); $this->OptimizeColumnLoad($oSet);
$sExportDateTimeFormat = $this->aStatusInfo['date_format']; $sExportDateTimeFormat = $this->aStatusInfo['date_format'];
$bFormattedText = (array_key_exists('formatted_text', $this->aStatusInfo) ? $this->aStatusInfo['formatted_text'] : false);
// Date & time formats // Date & time formats
$oDateTimeFormat = new DateTimeFormat($sExportDateTimeFormat); $oDateTimeFormat = new DateTimeFormat($sExportDateTimeFormat);
$oDateFormat = new DateTimeFormat($oDateTimeFormat->ToDateFormat()); $oDateFormat = new DateTimeFormat($oDateTimeFormat->ToDateFormat());
@@ -289,6 +312,20 @@ EOF
// Trick for Excel: treat the content as text even if it begins with an equal sign // Trick for Excel: treat the content as text even if it begins with an equal sign
$sData .= "<td x:str>$sField</td>"; $sData .= "<td x:str>$sField</td>";
} }
elseif ($oAttDef instanceof AttributeText)
{
if ($bFormattedText)
{
// Replace paragraphs (<p...>...</p>, etc) by line breaks (<br/>) since Excel (pre-2016) splits the cells when there is a paragraph
$sField = static::HtmlToSpreadsheet($oObj->GetAsHTML($sAttCode));
}
else
{
// Convert to plain text
$sField = utils::HtmlToText($oObj->GetAsHTML($sAttCode));
}
$sData .= "<td x:str>$sField</td>";
}
else if($oAttDef instanceof AttributeString) else if($oAttDef instanceof AttributeString)
{ {
$sField = $oObj->GetAsHTML($sAttCode, $this->bLocalizeOutput); $sField = $oObj->GetAsHTML($sAttCode, $this->bLocalizeOutput);
@@ -354,4 +391,51 @@ EOF
{ {
return 'html'; return 'html';
} }
/**
* Cleanup all markup displayed as line breaks (except <br> tags) since this
* causes Excel (pre-2016) to generate extra lines in the table, thus breaking
* the tabular disposition of the export
* Note: Excel 2016 also refuses line breaks, so the only solution for this case is alas plain text
* @param string $sHtml The HTML to cleanup
* @return string The cleaned HTML
*/
public static function HtmlToSpreadsheet($sHtml)
{
if (trim(strip_tags($sHtml)) === '')
{
// Display this value as an empty cell in the table
return '&nbsp;';
}
// The tags listed here are a subset of the whitelist defined in HTMLDOMSanitizer
// Tags causing a visual "line break" in the displayed page (i.e. display: block) are to be replaced by a <span> followed by a <br/>
// in order to preserve any inline style/attribute of the removed tag
$aTagsToReplace = array(
'pre', 'div', 'p', 'hr', 'center', 'h1', 'h2', 'h3', 'h4', 'li', 'fieldset', 'legend', 'nav', 'section', 'tr', 'caption',
);
// Tags to completely remove from the markup
$aTagsToRemove = array(
'table', 'thead', 'tbody', 'ul', 'ol', 'td', 'th',
);
// Remove the englobing <div class="HTML" >...</div> to prevent an extra line break
$sHtml = preg_replace('|^<div class="HTML" >(.*)</div>$|s', '$1', $sHtml); // Must use the "s" (. matches newline) modifier
foreach($aTagsToReplace as $sTag)
{
$sHtml = preg_replace("|<{$sTag} ?([^>]*)>|is", '<span $1>', $sHtml);
$sHtml = preg_replace("|</{$sTag}>|i", '</span><br/>', $sHtml);
}
foreach($aTagsToRemove as $sTag)
{
$sHtml = preg_replace("|<{$sTag} ?([^>]*)>|is", '', $sHtml);
$sHtml = preg_replace("|</{$sTag}>|i", '', $sHtml);
}
// Remove any trailing <br/>, if any, to prevent an extra line break
$sHtml = preg_replace("|<br/>$|", '', $sHtml);
return $sHtml;
}
} }