// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #include "pch.h" #include "CopyPasteManager.h" #include "Common/TraceLogger.h" #include "Common/LocalizationSettings.h" using namespace std; using namespace concurrency; using namespace CalculatorApp; using namespace CalculatorApp::Common; using namespace Platform; using namespace Platform::Collections; using namespace Windows::Foundation; using namespace Windows::System; using namespace Windows::ApplicationModel::DataTransfer; using namespace Windows::Foundation::Collections; StringReference PasteErrorString(L"NoOp"); static const wstring c_validBasicCharacterSet = L"0123456789+-.e"; static const wstring c_validStandardCharacterSet = c_validBasicCharacterSet + L"*/"; static const wstring c_validScientificCharacterSet = c_validStandardCharacterSet + L"()^%"; static const wstring c_validProgrammerCharacterSet = c_validStandardCharacterSet + L"()%abcdfABCDEF"; // The below values can not be "constexpr"-ed, // as both wstring_view and wchar[] can not be concatenated // [\s\x85] means white-space characters static const wstring c_wspc = L"[\\s\\x85]*"; static const wstring c_wspcLParens = c_wspc + L"[(]*" + c_wspc; static const wstring c_wspcLParenSigned = c_wspc + L"([-+]?[(])*" + c_wspc; static const wstring c_wspcRParens = c_wspc + L"[)]*" + c_wspc; static const wstring c_signedDecFloat = L"(?:[-+]?(?:\\d+(\\.\\d*)?|\\.\\d+))"; static const wstring c_optionalENotation = L"(?:e[+-]?\\d+)?"; // Programmer Mode Integer patterns // Support digit separators ` (WinDbg/MASM), ' (C++), and _ (C# and other languages) static const wstring c_hexProgrammerChars = L"([a-f]|[A-F]|\\d)+((_|'|`)([a-f]|[A-F]|\\d)+)*"; static const wstring c_decProgrammerChars = L"\\d+((_|'|`)\\d+)*"; static const wstring c_octProgrammerChars = L"[0-7]+((_|'|`)[0-7]+)*"; static const wstring c_binProgrammerChars = L"[0-1]+((_|'|`)[0-1]+)*"; static const wstring c_uIntSuffixes = L"[uU]?[lL]{0,2}"; // RegEx Patterns used by various modes static const array standardModePatterns = { wregex(c_wspc + c_signedDecFloat + c_optionalENotation + c_wspc) }; static const array scientificModePatterns = { wregex( L"(" + c_wspc + L"[-+]?)|(" + c_wspcLParenSigned + L")" + c_signedDecFloat + c_optionalENotation + c_wspcRParens) }; static const array, 4> programmerModePatterns = { { // Hex numbers like 5F, 4A0C, 0xa9, 0xFFull, 47CDh { wregex(c_wspcLParens + L"(0[xX])?" + c_hexProgrammerChars + c_uIntSuffixes + c_wspcRParens), wregex(c_wspcLParens + c_hexProgrammerChars + L"[hH]?" + c_wspcRParens) }, // Decimal numbers like -145, 145, 0n145, 123ull etc { wregex(c_wspcLParens + L"[-+]?" + c_decProgrammerChars + L"[lL]{0,2}" + c_wspcRParens), wregex(c_wspcLParens + L"(0[nN])?" + c_decProgrammerChars + c_uIntSuffixes + c_wspcRParens) }, // Octal numbers like 06, 010, 0t77, 0o77, 077ull etc { wregex(c_wspcLParens + L"(0[otOT])?" + c_octProgrammerChars + c_uIntSuffixes + c_wspcRParens) }, // Binary numbers like 011010110, 0010110, 10101001, 1001b, 0b1001, 0y1001, 0b1001ull { wregex(c_wspcLParens + L"(0[byBY])?" + c_binProgrammerChars + c_uIntSuffixes + c_wspcRParens), wregex(c_wspcLParens + c_binProgrammerChars + L"[bB]?" + c_wspcRParens) } } }; static const array unitConverterPatterns = { wregex(c_wspc + c_signedDecFloat + c_wspc) }; void CopyPasteManager::CopyToClipboard(String ^ stringToCopy) { // Copy the string to the clipboard auto dataPackage = ref new DataPackage(); dataPackage->SetText(stringToCopy); Clipboard::SetContent(dataPackage); } IAsyncOperation ^ CopyPasteManager::GetStringToPaste(ViewMode mode, CategoryGroupType modeType, NumberBase programmerNumberBase, BitLength bitLengthType) { // Retrieve the text in the clipboard auto dataPackageView = Clipboard::GetContent(); // TODO: Support all formats supported by ClipboardHasText //-- add support to avoid pasting of expressions like 12 34(as of now we allow 1234) //-- add support to allow pasting for expressions like .2 , -.2 //-- add support to allow pasting for expressions like 1.3e12(as of now we allow 1.3e+12) return create_async([dataPackageView, mode, modeType, programmerNumberBase, bitLengthType] { return create_task(dataPackageView->GetTextAsync(::StandardDataFormats::Text)) .then( [mode, modeType, programmerNumberBase, bitLengthType](String ^ pastedText) { return ValidatePasteExpression(pastedText, mode, modeType, programmerNumberBase, bitLengthType); }, task_continuation_context::use_arbitrary()); }); } bool CopyPasteManager::HasStringToPaste() { return Clipboard::GetContent()->Contains(StandardDataFormats::Text); } String ^ CopyPasteManager::ValidatePasteExpression(String ^ pastedText, ViewMode mode, NumberBase programmerNumberBase, BitLength bitLengthType) { return ValidatePasteExpression(pastedText, mode, NavCategory::GetGroupType(mode), programmerNumberBase, bitLengthType); } // return "NoOp" if pastedText is invalid else return pastedText String ^ CopyPasteManager::ValidatePasteExpression( String ^ pastedText, ViewMode mode, CategoryGroupType modeType, NumberBase programmerNumberBase, BitLength bitLengthType) { if (pastedText->Length() > MaxPasteableLength) { // return NoOp to indicate don't paste anything. TraceLogger::GetInstance()->LogError(mode, L"CopyPasteManager::ValidatePasteExpression", L"PastedExpressionSizeGreaterThanMaxAllowed"); return PasteErrorString; } // Get english translated expression String ^ englishString = LocalizationSettings::GetInstance().GetEnglishValueFromLocalizedDigits(pastedText); // Removing the spaces, comma separator from the pasteExpression to allow pasting of expressions like 1 + 2+1,333 auto pasteExpression = wstring(RemoveUnwantedCharsFromString(englishString)->Data()); // If the last character is an = sign, remove it from the pasteExpression to allow evaluating the result on paste. if (!pasteExpression.empty() && pasteExpression.back() == L'=') { pasteExpression = pasteExpression.substr(0, pasteExpression.length() - 1); } // Extract operands from the expression to make regex comparison easy and quick. For whole expression it was taking too much of time. // Operands vector will have the list of operands in the pasteExpression auto operands = ExtractOperands(StringReference(pasteExpression.c_str()), mode); if (operands->Size == 0) { // return NoOp to indicate don't paste anything. return PasteErrorString; } if (modeType == CategoryGroupType::Converter) { operands->Clear(); operands->Append(ref new String(pasteExpression.c_str())); } // validate each operand with patterns for different modes if (!ExpressionRegExMatch(operands, mode, modeType, programmerNumberBase, bitLengthType)) { TraceLogger::GetInstance()->LogError(mode, L"CopyPasteManager::ValidatePasteExpression", L"InvalidExpressionForPresentMode"); return PasteErrorString; } return pastedText; } IVector ^ CopyPasteManager::ExtractOperands(Platform::String ^ pasteExpression, ViewMode mode) { auto operands = ref new Vector(); size_t lastIndex = 0; bool haveOperator = false; bool startExpCounting = false; bool startOfExpression = true; bool isPreviousOpenParen = false; bool isPreviousOperator = false; wstring validCharacterSet; switch (mode) { case ViewMode::Standard: validCharacterSet = c_validStandardCharacterSet; break; case ViewMode::Scientific: validCharacterSet = c_validScientificCharacterSet; break; case ViewMode::Programmer: validCharacterSet = c_validProgrammerCharacterSet; break; default: validCharacterSet = c_validBasicCharacterSet; } // This will have the exponent length size_t expLength = 0; int i = -1; for (auto currentChar : pasteExpression) { ++i; // if the current character is not a valid one don't process it if (validCharacterSet.find(currentChar) == wstring_view::npos) { continue; } if (operands->Size >= MaxOperandCount) { TraceLogger::GetInstance()->LogError(mode, L"CopyPasteManager::ExtractOperands", L"OperandCountGreaterThanMaxCount"); operands->Clear(); return operands; } if (currentChar >= L'0' && currentChar <= L'9') { if (startExpCounting) { expLength++; // to disallow pasting of 1e+12345 as 1e+1234, max exponent that can be pasted is 9999. if (expLength > MaxExponentLength) { TraceLogger::GetInstance()->LogError(mode, L"CopyPasteManager::ExtractOperands", L"ExponentLengthGreaterThanMaxLength"); operands->Clear(); return operands; } } isPreviousOperator = false; } else if (currentChar == L'e') { if (mode != ViewMode::Programmer) { startExpCounting = true; } isPreviousOperator = false; } else if (currentChar == L'+' || currentChar == L'-' || currentChar == L'*' || currentChar == L'/' || currentChar == L'^' || currentChar == L'%') { if (currentChar == L'+' || currentChar == L'-') { // don't break the expression into operands if the encountered character corresponds to sign command(+-) if (isPreviousOpenParen || startOfExpression || isPreviousOperator || ((mode != ViewMode::Programmer) && !((i != 0) && pasteExpression->Data()[i - 1] != L'e'))) { isPreviousOperator = false; continue; } } startExpCounting = false; expLength = 0; haveOperator = true; isPreviousOperator = true; operands->Append(ref new String(wstring(pasteExpression->Data()).substr(lastIndex, i - lastIndex).c_str())); lastIndex = i + 1; } else { isPreviousOperator = false; } isPreviousOpenParen = (currentChar == L'('); startOfExpression = false; } if (!haveOperator) { operands->Clear(); operands->Append(pasteExpression); } else { operands->Append(ref new String(wstring(pasteExpression->Data()).substr(lastIndex, pasteExpression->Length() - 1).c_str())); } return operands; } bool CopyPasteManager::ExpressionRegExMatch( IVector ^ operands, ViewMode mode, CategoryGroupType modeType, NumberBase programmerNumberBase, BitLength bitLengthType) { if (operands->Size == 0) { return false; } vector patterns{}; if (mode == ViewMode::Standard) { patterns.assign(standardModePatterns.begin(), standardModePatterns.end()); } else if (mode == ViewMode::Scientific) { patterns.assign(scientificModePatterns.begin(), scientificModePatterns.end()); } else if (mode == ViewMode::Programmer) { patterns.assign( programmerModePatterns[(int)programmerNumberBase - (int)NumberBase::HexBase].begin(), programmerModePatterns[(int)programmerNumberBase - (int)NumberBase::HexBase].end()); } else if (modeType == CategoryGroupType::Converter) { patterns.assign(unitConverterPatterns.begin(), unitConverterPatterns.end()); } auto maxOperandLengthAndValue = GetMaxOperandLengthAndValue(mode, modeType, programmerNumberBase, bitLengthType); bool expMatched = true; for (const auto& operand : operands) { // Each operand only needs to match one of the available patterns. bool operandMatched = false; for (const auto& pattern : patterns) { operandMatched = operandMatched || regex_match(operand->Data(), pattern); } if (operandMatched) { // Remove characters that are valid in the expression but we do not want to include in length calculations // or which will break conversion from string-to-ULL. auto operandValue = SanitizeOperand(operand); // If an operand exceeds the maximum length allowed, break and return. if (OperandLength(operandValue, mode, modeType, programmerNumberBase) > maxOperandLengthAndValue.maxLength) { expMatched = false; break; } // If maxOperandValue is set and the operandValue exceeds it, break and return. if (maxOperandLengthAndValue.maxValue != 0) { auto operandAsULL = TryOperandToULL(operandValue, programmerNumberBase); if (operandAsULL == nullptr) { // Operand was empty, received invalid_argument, or received out_of_range. Input is invalid. expMatched = false; break; } if (operandAsULL->Value > maxOperandLengthAndValue.maxValue) { expMatched = false; break; } } } expMatched = expMatched && operandMatched; } return expMatched; } CopyPasteMaxOperandLengthAndValue CopyPasteManager::GetMaxOperandLengthAndValue(ViewMode mode, CategoryGroupType modeType, NumberBase programmerNumberBase, BitLength bitLengthType) { constexpr size_t defaultMaxOperandLength = 0; constexpr uint64_t defaultMaxValue = 0; CopyPasteMaxOperandLengthAndValue res; if (mode == ViewMode::Standard) { res.maxLength = MaxStandardOperandLength; res.maxValue = defaultMaxValue; return res; } else if (mode == ViewMode::Scientific) { res.maxLength = MaxScientificOperandLength; res.maxValue = defaultMaxValue; return res; } else if (mode == ViewMode::Programmer) { unsigned int bitLength = 0; switch (bitLengthType) { case BitLength::BitLengthQWord: bitLength = 64; break; case BitLength::BitLengthDWord: bitLength = 32; break; case BitLength::BitLengthWord: bitLength = 16; break; case BitLength::BitLengthByte: bitLength = 8; break; } double bitsPerDigit = 0; switch (programmerNumberBase) { case NumberBase::BinBase: bitsPerDigit = log2(2); break; case NumberBase::OctBase: bitsPerDigit = log2(8); break; case NumberBase::DecBase: bitsPerDigit = log2(10); break; case NumberBase::HexBase: bitsPerDigit = log2(16); break; } unsigned int signBit = (programmerNumberBase == NumberBase::DecBase) ? 1 : 0; const auto maxLength = static_cast(ceil((bitLength - signBit) / bitsPerDigit)); const uint64_t maxValue = UINT64_MAX >> (MaxProgrammerBitLength - (bitLength - signBit)); res.maxLength = maxLength; res.maxValue = maxValue; return res; } else if (modeType == CategoryGroupType::Converter) { res.maxLength = MaxConverterInputLength; res.maxValue = defaultMaxValue; return res; } res.maxLength = defaultMaxOperandLength; res.maxValue = defaultMaxValue; return res; } Platform::String ^ CopyPasteManager::SanitizeOperand(Platform::String ^ operand) { constexpr wchar_t unWantedChars[] = { L'\'', L'_', L'`', L'(', L')', L'-', L'+' }; return ref new String(Utils::RemoveUnwantedCharsFromString(operand->Data(), unWantedChars).c_str()); } IBox ^ CopyPasteManager::TryOperandToULL(String ^ operand, NumberBase numberBase) { if (operand->Length() == 0 || operand->Data()[0] == L'-') { return nullptr; } int intBase; switch (numberBase) { case NumberBase::HexBase: intBase = 16; break; case NumberBase::OctBase: intBase = 8; break; case NumberBase::BinBase: intBase = 2; break; default: case NumberBase::DecBase: intBase = 10; break; } wstring::size_type size = 0; try { return stoull(operand->Data(), &size, intBase); } catch (const invalid_argument&) { // Do nothing } catch (const out_of_range&) { // Do nothing } return nullptr; } ULONG32 CopyPasteManager::OperandLength(Platform::String ^ operand, ViewMode mode, CategoryGroupType modeType, NumberBase programmerNumberBase) { if (modeType == CategoryGroupType::Converter) { return operand->Length(); } switch (mode) { case ViewMode::Standard: case ViewMode::Scientific: return StandardScientificOperandLength(operand); case ViewMode::Programmer: return ProgrammerOperandLength(operand, programmerNumberBase); default: return 0; } } ULONG32 CopyPasteManager::StandardScientificOperandLength(Platform::String ^ operand) { auto operandWstring = wstring(operand->Data()); const bool hasDecimal = operandWstring.find('.') != wstring::npos; if (hasDecimal) { if (operandWstring.length() >= 2) { if ((operandWstring[0] == L'0') && (operandWstring[1] == L'.')) { return static_cast(operandWstring.length() - 2); } else { return static_cast(operandWstring.length() - 1); } } } return static_cast(operandWstring.length()); } ULONG32 CopyPasteManager::ProgrammerOperandLength(Platform::String ^ operand, NumberBase numberBase) { vector prefixes{}; vector suffixes{}; switch (numberBase) { case NumberBase::BinBase: prefixes = { L"0B", L"0Y" }; suffixes = { L"B" }; break; case NumberBase::DecBase: prefixes = { L"-", L"0N" }; break; case NumberBase::OctBase: prefixes = { L"0T", L"0O" }; break; case NumberBase::HexBase: prefixes = { L"0X" }; suffixes = { L"H" }; break; default: // No defined prefixes/suffixes return 0; } // UInt suffixes are common across all modes const array uintSuffixes = { L"ULL", L"UL", L"LL", L"U", L"L" }; suffixes.insert(suffixes.end(), uintSuffixes.begin(), uintSuffixes.end()); wstring operandUpper = wstring(operand->Data()); transform(operandUpper.begin(), operandUpper.end(), operandUpper.begin(), towupper); size_t len = operand->Length(); // Detect if there is a suffix and subtract its length // Check suffixes first to allow e.g. "0b" to result in length 1 (value 0), rather than length 0 (no value). for (const auto& suffix : suffixes) { if (len < suffix.length()) { continue; } if (operandUpper.compare(operandUpper.length() - suffix.length(), suffix.length(), suffix) == 0) { len -= suffix.length(); break; } } // Detect if there is a prefix and subtract its length for (const auto& prefix : prefixes) { if (len < prefix.length()) { continue; } if (operandUpper.compare(0, prefix.length(), prefix) == 0) { len -= prefix.length(); break; } } return static_cast(len); } // return wstring after removing characters like space, comma, double quotes, and monetary prefix currency symbols supported by the Windows keyboard: // yen or yuan(¥) - 165 // unspecified currency sign(¤) - 164 // Ghanaian cedi(₵) - 8373 // dollar or peso($) - 36 // colón(₡) - 8353 // won(₩) - 8361 // shekel(₪) - 8362 // naira(₦) - 8358 // Indian rupee(₹) - 8377 // pound(£) - 163 // euro(€) - 8364 Platform::String ^ CopyPasteManager::RemoveUnwantedCharsFromString(Platform::String ^ input) { constexpr wchar_t unWantedChars[] = { L' ', L',', L'"', 165, 164, 8373, 36, 8353, 8361, 8362, 8358, 8377, 163, 8364, 8234, 8235, 8236, 8237 }; return ref new String(Utils::RemoveUnwantedCharsFromString(input->Data(), unWantedChars).c_str()); } bool CopyPasteManager::IsErrorMessage(Platform::String ^ message) { return message == PasteErrorString; }