Bugzilla – Attachment 43719 Details for
Bug 34319
Japanese text is incorrectly exported to Word document.
Home
|
New
|
Browse
|
Search
|
[?]
|
Reports
|
Help
|
New Account
|
Log In
[x]
|
Forgot Password
Login:
[x]
[patch]
rough and ready hack at the other part
rough.ready.patch (text/plain), 14.71 KB, created by
Caolán McNamara
on 2011-02-23 08:57:36 UTC
(
hide
)
Description:
rough and ready hack at the other part
Filename:
MIME Type:
Creator:
Caolán McNamara
Created:
2011-02-23 08:57:36 UTC
Size:
14.71 KB
patch
obsolete
>diff --git a/sw/source/filter/ww8/ww8par.cxx b/sw/source/filter/ww8/ww8par.cxx >index 94e2564..9c59ce4 100644 >--- a/sw/source/filter/ww8/ww8par.cxx >+++ b/sw/source/filter/ww8/ww8par.cxx >@@ -56,6 +56,7 @@ > #include <editeng/langitem.hxx> > #include <editeng/opaqitem.hxx> > #include <editeng/charhiddenitem.hxx> >+#include <editeng/fontitem.hxx> > #include <filter/msfilter/svxmsbas.hxx> > #include <svx/unoapi.hxx> > #include <svx/svdoole2.hxx> >@@ -104,6 +105,7 @@ > #include <fchrfmt.hxx> > #include <charfmt.hxx> > >+#include <breakit.hxx> > > #include <com/sun/star/i18n/ForbiddenCharacters.hpp> > #include <comphelper/extract.hxx> >@@ -154,6 +156,8 @@ using namespace nsHdFtFlags; > > #include <com/sun/star/document/XEventsSupplier.hpp> > #include <com/sun/star/container/XNameReplace.hpp> >+#include <com/sun/star/i18n/XBreakIterator.hpp> >+#include <com/sun/star/i18n/ScriptType.hdl> > #include <com/sun/star/frame/XModel.hpp> > #include <filter/msfilter/msvbahelper.hxx> > #include <unotools/pathoptions.hxx> >@@ -2622,7 +2626,262 @@ bool SwWW8ImplReader::ReadPlainChars(WW8_CP& rPos, long nEnd, long nCpOfs) > return nL2 >= nLen; > } > >-//TODO: In writer we categorize text into CJK, CTL and "Western" for everything >+/* >+ Character type | Font (ftc) | Language (lid) >+ ASCII | sprmCRgftc0 | sprmCRglid0 >+ non-East Asian | sprmCRgftc2 | sprmCRglid0 >+ East Asian | sprmCRgftc1 | sprmCRglid1 >+ shared character | sprmCRgftc2 if chp.idctHint==0 | sprmCRglid0 if chp.idctHint==0 >+ | sprmCRgftc1 if chp.idctHint==1 | sprmCRglid1 if chp.idctHint==1 >+*/ >+ >+enum MSScriptType >+{ >+ ASCII = i18n::ScriptType::LATIN, >+ EastAsian = i18n::ScriptType::ASIAN, >+ NonEastAsian = i18n::ScriptType::COMPLEX, >+ Shared = i18n::ScriptType::WEAK >+}; >+ >+MSScriptType categorizeCharByMSScript(sal_Unicode cChar) >+{ >+ if (cChar >= 0x20 && cChar <= 0x7f) //usrBasicLatin >+ return MSScriptType::ASCII; >+ if (cChar >= 0xa0 && cChar <= 0xff) //usrLatin1 >+ { >+ switch (cChar) >+ { >+ case 0xa1: >+ case 0xa4: >+ case 0xa7: >+ case 0xa8: >+ case 0xaa: >+ case 0xad: >+ case 0xaf: >+ case 0xb0: >+ case 0xb1: >+ case 0xb2: >+ case 0xb3: >+ case 0xb4: >+ case 0xb6: >+ case 0xb7: >+ case 0xb8: >+ case 0xb9: >+ case 0xba: >+ case 0xbc: >+ case 0xbd: >+ case 0xbe: >+ case 0xbf: >+ case 0xd7: >+ case 0xf7: >+ return MSScriptType::Shared; >+ default: >+ return MSScriptType::NonEastAsian; >+ } >+ } >+ if (cChar >= 0x100 && cChar <= 0x17f) //usrLatinXA >+ { >+ switch (cChar) >+ { >+ case 0x100: >+ case 0x101: >+ case 0x113: >+ case 0x11b: >+ case 0x12b: >+ case 0x144: >+ case 0x148: >+ case 0x14d: >+ case 0x16b: >+ return MSScriptType::Shared; >+ default: >+ return MSScriptType::NonEastAsian; >+ } >+ >+ } >+ if (cChar >= 0x180 && cChar <= 0x24f) //usrLatinXB >+ { >+ switch (cChar) >+ { >+ case 0x192: >+ case 0x1fa: >+ case 0x1fb: >+ case 0x1fc: >+ case 0x1fd: >+ case 0x1fe: >+ return MSScriptType::Shared; >+ default: >+ return MSScriptType::NonEastAsian; >+ } >+ >+ } >+ if (cChar >= 0x250 && cChar <= 0x3af) //usrIPAExtensions >+ { >+ switch (cChar) >+ { >+ case 0x251: >+ case 0x261: >+ return MSScriptType::Shared; >+ default: >+ return MSScriptType::NonEastAsian; >+ } >+ } >+ if (cChar >= 0x2b0 && cChar <= 0x2ff) //usrSpacingModLetters >+ return MSScriptType::Shared; >+ if (cChar >= 0x300 && cChar <= 0x36f) //usrCombDiacritical >+ return MSScriptType::Shared; >+ if (cChar >= 0x370 && cChar <= 0x3cf) //usrBasicGreek >+ return MSScriptType::Shared; >+ if (cChar >= 0x400 && cChar <= 0x4ff) //usrCyrillic >+ return MSScriptType::Shared; >+ if (cChar >= 0x1e00 && cChar <= 0x1eff) //usrLatinExtendedAdd >+ return MSScriptType::Shared; >+ if (cChar >= 0x2000 && cChar <= 0x2065) //usrGeneralPunct >+ return MSScriptType::Shared; >+ if (cChar >= 0x2070 && cChar <= 0x209f) //usrSuperAndSubscript >+ return MSScriptType::Shared; >+ if (cChar >= 0x20a0 && cChar <= 0x20cf) //usrCurrencySymbols >+ return MSScriptType::Shared; >+ if (cChar >= 0x20d0 && cChar <= 0x20ff) //usrCombDiacriticsS >+ return MSScriptType::Shared; >+ if (cChar >= 0x2100 && cChar <= 0x214f) //usrLetterlikeSymbols >+ return MSScriptType::Shared; >+ if (cChar >= 0x2150 && cChar <= 0x218f) //usrNumberForms >+ return MSScriptType::Shared; >+ if (cChar >= 0x2190 && cChar <= 0x21ff) //usrArrows >+ return MSScriptType::Shared; >+ if (cChar >= 0x2200 && cChar <= 0x22ff) //usrMathematicalOps >+ return MSScriptType::Shared; >+ if (cChar >= 0x2300 && cChar <= 0x23ff) //usrMiscTechnical >+ return MSScriptType::Shared; >+ if (cChar >= 0x2400 && cChar <= 0x243f) //usrControlPictures >+ return MSScriptType::Shared; >+ if (cChar >= 0x2440 && cChar <= 0x245f) //usrOpticalCharRecog >+ return MSScriptType::Shared; >+ if (cChar >= 0x2460 && cChar <= 0x24ff) //usrEnclosedAlphanum >+ return MSScriptType::Shared; >+ if (cChar >= 0x2500 && cChar <= 0x257f) //usrBoxDrawing >+ return MSScriptType::Shared; >+ if (cChar >= 0x2580 && cChar <= 0x259f) //usrBlockElements >+ return MSScriptType::Shared; >+ if (cChar >= 0x25a0 && cChar <= 0x25ff) //usrGeometricShapes >+ return MSScriptType::Shared; >+ if (cChar >= 0x2600 && cChar <= 0x26ff) //usrMiscDingbats >+ return MSScriptType::Shared; >+ if (cChar >= 0x2700 && cChar <= 0x27bf) //usrDingbats >+ return MSScriptType::Shared; >+ if (cChar >= 0x3000 && cChar <= 0x303f) //usrCJKSymAndPunct >+ return MSScriptType::EastAsian; >+ if (cChar >= 0x3040 && cChar <= 0x309f) //usrHiragana >+ return MSScriptType::EastAsian; >+ if (cChar >= 0x30a0 && cChar <= 0x30ff) //usrKatakana >+ return MSScriptType::EastAsian; >+ if (cChar >= 0x3100 && cChar <= 0x312f) //usrBopomofo >+ return MSScriptType::EastAsian; >+ if (cChar >= 0x3130 && cChar <= 0x318f) //usrHangulCompatJamo >+ return MSScriptType::EastAsian; >+ if (cChar >= 0x3190 && cChar <= 0x319f) //usrCJKMisc >+ return MSScriptType::EastAsian; >+ if (cChar >= 0x3200 && cChar <= 0x32ff) //usrEnclosedCJKLtMnth >+ return MSScriptType::EastAsian; >+ if (cChar >= 0x3300 && cChar <= 0x33ff) //usrCJKCompatibility >+ return MSScriptType::EastAsian; >+ if (cChar >= 0x4a00 && cChar <= 0x4dff) //usrCJKCompatibility >+ return MSScriptType::EastAsian; >+ if (cChar >= 0x4e00 && cChar <= 0x9fff) //usrCJKUnifiedIdeo >+ return MSScriptType::EastAsian; >+ if (cChar >= 0xac00 && cChar <= 0xd7a3) //usrHangul >+ return MSScriptType::EastAsian; >+ if (cChar >= 0xe000 && cChar <= 0xf8ff) //usrPrivateUseArea >+ return MSScriptType::Shared; >+ if (cChar >= 0xf900 && cChar <= 0xfaff) //usrCJKCompatibilityIdeographs >+ return MSScriptType::EastAsian; >+ if (cChar >= 0xfb00 && cChar <= 0xfb4f) //usrAlphaPresentationForms >+ return MSScriptType::Shared; >+ if (cChar >= 0xfb50 && cChar <= 0xfdff) //usrArabicPresentationFormsA >+ return MSScriptType::Shared; >+ if (cChar >= 0xfe20 && cChar <= 0xfe2f) //usrCombiningHalfMarks >+ return MSScriptType::EastAsian; >+ if (cChar >= 0xfe30 && cChar <= 0xfe4f) //usrCJKCompatForms >+ return MSScriptType::EastAsian; >+ if (cChar >= 0xfe50 && cChar <= 0xfe6f) //usrSmallFormVariants >+ return MSScriptType::EastAsian; >+ if (cChar >= 0xfe70 && cChar <= 0xfefe) //usrArabicPresentationFormsB >+ return MSScriptType::Shared; >+ if (cChar >= 0xff00 && cChar <= 0xffef) //usrHFWidthForms >+ return MSScriptType::EastAsian; >+ return MSScriptType::NonEastAsian; >+} >+ >+void SwWW8ImplReader::emulateMSWordAddChunkToParagraph(const rtl::OUString& rAddString, >+ sal_uInt16 nLibreOfficeScript, sal_uInt16 nMSOfficeScript) >+{ >+ if (nMSOfficeScript == MSScriptType::Shared) >+ { >+ if (nIdctHint == 0) >+ nMSOfficeScript = MSScriptType::NonEastAsian; >+ else if (nIdctHint == 1) >+ nMSOfficeScript = MSScriptType::EastAsian; >+ } >+ bool bForceProperties = (nMSOfficeScript != nLibreOfficeScript); >+ sal_Int16 nLibreOfficeId; >+ if (bForceProperties) >+ { >+ //This is the ID that LibreOffice will use for the text >+ switch (nLibreOfficeScript) >+ { >+ case i18n::ScriptType::LATIN: >+ default: >+ nLibreOfficeId = RES_CHRATR_FONT; >+ break; >+ case i18n::ScriptType::ASIAN: >+ nLibreOfficeId = RES_CHRATR_CJK_FONT; >+ break; >+ case i18n::ScriptType::COMPLEX: >+ nLibreOfficeId = RES_CHRATR_CTL_FONT; >+ break; >+ } >+ //This is the ID that contains the properties that MSWord >+ //would use >+ sal_Int16 nForceLibreOfficeId; >+ switch (nMSOfficeScript) >+ { >+ case MSScriptType::ASCII: >+ default: >+ nForceLibreOfficeId = RES_CHRATR_FONT; >+ break; >+ case MSScriptType::EastAsian: >+ nForceLibreOfficeId = RES_CHRATR_CJK_FONT; >+ break; >+ case MSScriptType::NonEastAsian: >+ nForceLibreOfficeId = RES_CHRATR_CTL_FONT; >+ break; >+ } >+ >+ const SvxFontItem *pSourceFont = (const SvxFontItem*)GetFmtAttr(nForceLibreOfficeId); >+ const SvxFontItem *pDestFont = (const SvxFontItem*)GetFmtAttr(nLibreOfficeId); >+ >+ if (pSourceFont && pDestFont) >+ { >+ //They're the same anyway, great, skip forcing >+ bForceProperties = *pSourceFont != *pDestFont; >+ } >+ if (pSourceFont && bForceProperties) >+ { >+ SvxFontItem aForceFont(*pSourceFont); >+ aForceFont.SetWhich(nLibreOfficeId); >+ pCtrlStck->NewAttr(*pPaM->GetPoint(), aForceFont); >+ } >+ else >+ bForceProperties = false; >+ } >+ >+ simpleAddTextToParagraph(rAddString); >+ >+ if (bForceProperties) >+ pCtrlStck->SetAttr(*pPaM->GetPoint(), nLibreOfficeId); >+} >+ >+//In writer we categorize text into CJK, CTL and "Western" for everything > //else. Microsoft Word basically categorizes text into East Asian, Non-East > //Asian and ASCII, with some shared characters and some properties to > //to hint as to which way to bias those shared characters. >@@ -2635,12 +2894,46 @@ bool SwWW8ImplReader::ReadPlainChars(WW8_CP& rPos, long nEnd, long nCpOfs) > //we're then forced (because we don't have an equivalent hint) to mirror the > //properties of the source MSWord category into the properties of the dest > //Writer category for that range of text in order to get the right results. >-bool SwWW8ImplReader::emulateMSWordAddTextToParagraph(const String& rAddString) >+void SwWW8ImplReader::emulateMSWordAddTextToParagraph(const rtl::OUString& rAddString) > { >- return simpleAddTextToParagraph(rAddString); >+ if (!rAddString.getLength()) >+ return; >+ >+ uno::Reference<i18n::XBreakIterator> xBI(pBreakIt->GetBreakIter()); >+ if (!xBI.is()) >+ { >+ simpleAddTextToParagraph(rAddString); >+ return; >+ } >+ >+ rtl::OUStringBuffer sChunk; >+ const sal_Unicode *pChar = rAddString.getStr(); >+ sal_Int32 i = 0;; >+ sal_uInt16 nLibreOfficeScript = xBI->getScriptType(rAddString, i++); >+ MSScriptType nMSOfficeScript = categorizeCharByMSScript(*pChar); >+ sChunk.append(*pChar++); >+ while (i < rAddString.getLength()) >+ { >+ sal_uInt16 nNextLibreOfficeScript = xBI->getScriptType(rAddString, i++); >+ MSScriptType nNextMSOfficeScript = categorizeCharByMSScript(*pChar); >+ if ( >+ sChunk.getLength() && >+ ((nNextLibreOfficeScript != nLibreOfficeScript) || (nNextMSOfficeScript != nMSOfficeScript)) >+ ) >+ { >+ emulateMSWordAddChunkToParagraph(sChunk.makeStringAndClear(), nMSOfficeScript, nLibreOfficeScript); >+ } >+ sChunk.append(*pChar); >+ nLibreOfficeScript = nNextLibreOfficeScript; >+ nMSOfficeScript = nNextMSOfficeScript; >+ ++pChar; >+ } >+ >+ if (sChunk.getLength()) >+ emulateMSWordAddChunkToParagraph(sChunk.makeStringAndClear(), nMSOfficeScript, nLibreOfficeScript); > } > >-bool SwWW8ImplReader::simpleAddTextToParagraph(const String& rAddString) >+void SwWW8ImplReader::simpleAddTextToParagraph(const String& rAddString) > { > const SwTxtNode* pNd = pPaM->GetCntntNode()->GetTxtNode(); > if (rAddString.Len()) >@@ -2681,8 +2974,6 @@ bool SwWW8ImplReader::simpleAddTextToParagraph(const String& rAddString) > > bReadTable = false; > } >- >- return true; > } > > // Returnwert: true for para end >@@ -3348,7 +3639,7 @@ SwWW8ImplReader::SwWW8ImplReader(BYTE nVersionPara, SvStorage* pStorage, > m_bRegardHindiDigits( false ), > mbNewDoc(bNewDoc), > nDropCap(0), >- nIdctHint(0), >+ nIdctHint(0xFF), > bBidi(false), > bReadTable(false) > { >diff --git a/sw/source/filter/ww8/ww8par.hxx b/sw/source/filter/ww8/ww8par.hxx >index 4be96b7..c2d4993 100644 >--- a/sw/source/filter/ww8/ww8par.hxx >+++ b/sw/source/filter/ww8/ww8par.hxx >@@ -1117,8 +1117,10 @@ private: > pReffingStck = 0; > } > void DeleteAnchorStk() { DeleteStk( pAnchorStck ); pAnchorStck = 0; } >- bool emulateMSWordAddTextToParagraph(const String& sAddString); >- bool simpleAddTextToParagraph(const String& sAddString); >+ void emulateMSWordAddTextToParagraph(const rtl::OUString& rAddString); >+ void emulateMSWordAddChunkToParagraph(const rtl::OUString& rAddString, >+ sal_uInt16 nLibreOfficeScript, sal_uInt16 nMSOfficeScript); >+ void simpleAddTextToParagraph(const String& sAddString); > bool HandlePageBreakChar(); > bool ReadChar(long nPosCp, long nCpOfs); > bool ReadPlainChars(WW8_CP& rPos, long nEnd, long nCpOfs); >diff --git a/sw/source/filter/ww8/ww8par6.cxx b/sw/source/filter/ww8/ww8par6.cxx >index a9cf3b0..6020d2a 100644 >--- a/sw/source/filter/ww8/ww8par6.cxx >+++ b/sw/source/filter/ww8/ww8par6.cxx >@@ -4312,7 +4312,7 @@ void SwWW8ImplReader::Read_UL( USHORT nId, const BYTE* pData, short nLen ) > void SwWW8ImplReader::Read_IdctHint( USHORT, const BYTE* pData, short nLen ) > { > if (nLen < 0) >- nIdctHint = 0; >+ nIdctHint = 0xFF; > else > nIdctHint = *pData; > }
You cannot view the attachment while viewing its details because your browser does not support IFRAMEs.
View the attachment on a separate page
.
View Attachment As Raw
Actions:
View
Attachments on
bug 34319
: 43719 |
43748