path: root/scite/debian/emitUnicode.lua
diff options
authorB. Stack <>2022-04-05 14:00:15 -0400
committerB. Stack <>2022-04-05 14:00:15 -0400
commiteedac700b10ab9e5900708ff06a09e881317f6eb (patch)
treea58b932c0336f100baea712d621fbb8ea9d9c40c /scite/debian/emitUnicode.lua
parentscite 5.2.2 with emitunicode patch, for fedora (diff)
scite: add devuan dpkg
Diffstat (limited to 'scite/debian/emitUnicode.lua')
1 files changed, 247 insertions, 0 deletions
diff --git a/scite/debian/emitUnicode.lua b/scite/debian/emitUnicode.lua
new file mode 100644
index 0000000..70d32b1
--- /dev/null
+++ b/scite/debian/emitUnicode.lua
@@ -0,0 +1,247 @@
+-- Unicode hexadecimal table:
+-- Source:
+-- Future reference:
+-- This lua script adds utf8 unicode input, to the scite text editor.
+-- The scite text editor should be set to use the UTF-8 encoding
+-- , because this script adds utf8, into the text buffer of the
+-- scite editor. Select File->Encoding->UTF-8, from the
+-- menu bar of scite.
+-- For example, it will be possible that you type 2200 CTRL+U
+-- , and 2200 is replaced to &#8704;; (U+2200), in the scite editor.
+-- ______________________________________________________________________________
+-- To have scite running this script each time you press Ctrl+U, add next lines
+-- into your ~/ file, where ~ is your home directory.
+-- FILE ~/
+ext.lua.startup.script=$(SciteUserHome)/emitUtf8UnicodeIntoTheSciteEditor.lua*=Emit UTF8 Unicode
+-- ______________________________________________________________________________
+-- Next is the definition of the lua function that is called by scite
+-- when CTRL+U is pressed, to replace unicode endpoint encoding, with
+-- utf8 encoding of the unicode endpoint.
+-- ______________________________________________________________________________
+-- Computes the utf8 encoding for a unicode codepoint u
+-- , when 0 <= u <= 0x7f
+-- @param unicodeValue the unicode codepoint u
+-- @return the utf8 encoding of the unicode codepoint u
+function case1UnicodeToUtf8(unicodeValue)
+ --print('case 1')
+ local u = unicodeValue
+ local byte0 = (u % 0x80)
+ local utf8 = string.char(byte0)
+ return utf8
+-- ______________________________________________________________________________
+-- Computes the utf8 encoding for a unicode codepoint u
+-- , when 0x80 <= u <= 0x7ff
+-- @param unicodeValue the unicode codepoint u
+-- @return the utf8 encoding of the unicode codepoint u
+function case2UnicodeToUtf8(unicodeValue)
+ --print('case 2')
+ local u = unicodeValue
+ local byte1 = (0x80 + (u % 0x40) )
+ u = math.floor(u / 0x40)
+ local byte0 = (0xc0 + (u % 0x20) )
+ local utf8 = string.char(byte0, byte1)
+ return utf8
+-- ______________________________________________________________________________
+-- Computes the utf8 encoding for a unicode codepoint u
+-- , when 0x800 <= u <= 0xffff.
+-- @param unicodeValue the unicode codepoint u
+-- @return the utf8 encoding of the unicode codepoint u
+function case3UnicodeToUtf8(unicodeValue)
+ local u = unicodeValue
+ local byte2 = (0x80 + (u % 0x40))
+ -- print('byte2: '..byte2)
+ u = math.floor(u / 0x40)
+ local byte1 = (0x80 + (u % 0x40))
+ -- print('byte1: '..byte1)
+ u = math.floor(u / 0x40)
+ local byte0 = (0xe0 + (u % 0x10))
+ -- print('byte0: '..byte0)
+ local utf8 = string.char(byte0, byte1, byte2)
+ return utf8
+-- ______________________________________________________________________________
+-- Computes the utf8 encoding for a unicode codepoint u
+-- , when 0x10000 <= u <= 0x10ffff.
+-- @param unicodeValue the unicode codepoint u
+-- @return the utf8 encoding of the unicode codepoint u
+function case4UnicodeToUtf8(unicodeValue)
+ local u = unicodeValue
+ local byte3 = (0x80 + (u % 0x40))
+ u = math.floor(u / 0x40)
+ local byte2 = (0x80 + (u % 0x40))
+ u = math.floor(u / 0x40)
+ local byte1 = (0x80 + (u % 0x40))
+ u = math.floor(u / 0x40)
+ local byte0 = (0xf0 + (u % 0x8))
+ local utf8 = string.char(byte0, byte1, byte2, byte3)
+ return utf8
+-- ______________________________________________________________________________
+-- Converts a unicode integer value, into a utf8 string value.
+-- The unicode integer value is an integer that
+-- is greater than or equal to zero.
+-- The utf8 string value is a string that is a sequence of
+-- 8 bits characters that give the utf8 encoding of the
+-- unicode codepoint given by the unicode integer value.
+-- @param unicodeValue the unicode integer value;
+-- a unicode codepoint
+-- @return the utf8 encoding of the unicode codepoint
+-- provided by the unicodeValue input argument
+function unicodeToUtf8(unicodeValue)
+ local u = unicodeValue
+ if ((0x800 <= u) and (0xffff >= u))
+ then
+ return case3UnicodeToUtf8(u)
+ end
+ if ((0x80 <= u) and (0x7fff >= u))
+ then
+ return case2UnicodeToUtf8(u)
+ end
+ if ((0x0 <= u) and (0x7f >= u))
+ then
+ return case1UnicodeToUtf8(u)
+ end
+ if( (0x10000 <= u) and (0x10ffff >= u) )
+ then
+ return case4UnicodeToUtf8(u)
+ end
+ return nil
+-- ______________________________________________________________________________
+-- Peeks (reads) the character at position i, in the Scite Editor.
+-- If the character is the ascii name of a hex digit, it returns
+-- the corresponding hex digit, otherwise it returns nil.
+-- @param i position in the Scite Editor
+-- @return hex digit at position i, or nil
+function peekHexdigit(i)
+ local e = editor
+ local asciiCode = e.CharAt[i]
+ if((0>asciiCode) or (0xff < asciiCode))
+ then
+ return nil
+ end
+ local charValue = string.char(asciiCode)
+ local hexDigit = tonumber(charValue,0x10)
+ return hexDigit -- may be nil
+-- ______________________________________________________________________________
+-- Reads the sequence of maximum length at most 5, at the left of the cursor
+-- in the Scite Editor.
+-- Encodes the longest suffix of this sequence, that is a hex number, into
+-- the utf encoding of this hex number.
+-- Replaces this longest suffix, with the utf8 sequence.
+-- @return true a suffix of length greater than zero, at most 5 existed
+-- and was replaced with the utf8 encoding of the number it
+-- represented
+-- false , when no such suffix existed
+function emitUtf8Unicode()
+ local e = editor
+ local n = e.TextLength
+ local i = e.CurrentPos
+ local maxlen = 5
+ if ((0 == n) or (1 > i))
+ then
+ return nil -- Success. No request
+ end
+ local len = 1
+ local len2 = 0
+ local u = 0
+ local thePower = 1
+ while ( (len <= maxlen)
+ and (0 <= (i - len) )
+ )
+ do
+ local hexDigit = peekHexdigit(i-len,u)
+ if (nil == hexDigit)
+ then
+ break -- out of the while loop
+ end
+ u = ( u + (thePower * hexDigit) )
+ thePower = (0x10 * thePower )
+ len2 = len
+ --print("u: "..u)
+ len = len + 1
+ end
+ if (0 == len2)
+ then
+ return nil -- Failure. No unicode
+ end
+ utf8 = unicodeToUtf8(u)
+ if(nil == utf8)
+ then
+ return nil -- Failure. Unicode to utf8 conversion failed.
+ end
+ e:SetSel(i-len2,i)
+ e:ReplaceSel(utf8)
+ --print("utf8: "..utf8)
+ return true -- Success.
+-- ______________________________________________________________________________
+-- Emits utf8 encoding in the place of the unicode codepoint
+-- in the editor, at the left of the cursor.
+-- Writes a message to the Output pane, if no codepoint existed
+-- at the left of the cursor.
+function emitUtf8UnicodeIntoTheSciteEditor()
+ local ok = emitUtf8Unicode()
+ if not ok
+ then
+ --print("Failed to encode unicode into text editor.")
+ end
+-- ______________________________________________________________________________
+-- Following web pages were useful in writing the lua scite script.