NISUS Archives

February 2011

NISUS@LISTSERV.DARTMOUTH.EDU

Options: Use Monospaced Font
Show Text Part by Default
Show All Mail Headers

Message: [<< First] [< Prev] [Next >] [Last >>]
Topic: [<< First] [< Prev] [Next >] [Last >>]
Author: [<< First] [< Prev] [Next >] [Last >>]

Print Reply
Subject:
From:
Reply To:
Date:
Wed, 9 Feb 2011 23:08:16 +0900
Content-Type:
text/plain
Parts/Attachments:
text/plain (116 lines)
I wrote this macro in order to facilitate the cleaning up of some imported doc files full of gremlins. By changing $destination option, it can be used, for example, to convert characters in a conversion table (hash) used by transliterateInRange command into hexadecimal values, with which the command works much faster than with real characters.
<http://www2.odn.ne.jp/alt-quinon/files/NWPro/convert/Selections2Hex_20110201_nwm.zip>


Kino

--
### Selections2Hex ###

# Convert characters in selection(s) into hexadecimal notations.

$destination = 'show'  # 'show' (in Exit dialog), 'replace' or 'clipboard'
$prefix = Cast to String '0x'  # customizable
$suffix = Cast to String ''  # customizable
$separator = Cast to String ', '  # customizable
$zeroPad = true  # true or false
$skipASCII = false  # true or false
$skippWhiteSpace = false  # true or false
$skippedWhiteSpaceChars = '[^\t\n\f\x20\x{3000}]'  # 0xA0 (no break space) is not skipped

Require Pro Version 1.3

$doc = Document.active
if $doc == undefined
	exit
end

$zeros = Array.newWithCount 5
$zeros[0] = Cast to String '000'  # 9 -> 0009
$zeros[1] = Cast to String '00'  # A0 -> 00A0
$zeros[2] = Cast to String '0'  # 101 -> 0101

$skipChars = Array.new

if $skipASCII == true
	$skipChars.appendValue '\p{^ASCII}'
end

if $skippWhiteSpace == true
	$skipChars.appendValue $skippedWhiteSpaceChars
end

if $skippWhiteSpace || $skipASCII
	$find = ''
	if $skippWhiteSpace && $skipASCII
		$find = $find.textByAppending '[', $skipChars.join('&&'), ']+'
	else
		$find = $skipChars.firstValue & '+'
	end
	Find All in Selection $find, 'E-i'
end

$sels = $doc.textSelections
if ! $sels.firstValue.length
	exit 'Nothing selected, exiting...'
end

if ! $destination.compare('replace', 'i')  # if $destination == 'replace' (case insensitive)
	Select Start
else
	$output = Array.new
end

foreach $sel in reversed $sels
	$notations = Array.new
	$i = 0
	while $i < $sel.substring.length
		$d = $sel.substring.characterAtIndex $i
		$isSurrogatePair = false
		if $d >= 0xD800  # high surrogate?
			if $d <= 0xDBFF
				if $sel.substring.rangeOfComposedCharacterAtIndex($i).length > 1
					if $sel.substring.characterAtIndex($i+1) >= 0xDC00
						if $sel.substring.characterAtIndex($i+1) <= 0xDFFF
							$isSurrogatePair = true
						end
					end
				end
			end
		end
		if $isSurrogatePair  # `man Encode::Unicode`: $uni = 0x10000+($hi-0xD800)*0x400+($lo-0xDC00)
			$d -= 0xD800  # $hi-0xD800
			$d = $d * 0x400  # ($hi-0xD800)*0x400
			$d += 0x2400  # ($hi-0xD800)*0x400+0x2400 [= 0x10000-0xDC00]
			$i += 1  # the next char (low surrogate)
			$d += $sel.substring.characterAtIndex $i  # ($hi-0xD800)*0x400+0x2400+$lo
		end
		$h = Convert To Hex $d
		if $zeroPad == true
			$h = $zeros[$h.length-1] & $h
		end
		$h = $prefix.textByAppending $h, $suffix
		$notations.appendValue $h
		$i += 1
	end
	$notations = $notations.join $separator
	if ! $destination.compare('replace', 'i')  # if $destination == 'replace' (case insensitive)
		$sel.text.replaceInRange $sel.range, $notations
	else
		$output.prependValue $notations
	end
end

if $destination.compare('replace', 'i')  # if $destination != 'replace' (case insensitive)
	$output = $output.join $separator
	if ! $destination.compare('show', 'i')  # if $destination == 'show' (case insensitive)
		exit $output
	elsif ! $destination.compare('clipboard', 'i')  # if $destination == 'clipboard' (case insensitive)
		Write Clipboard $output
	else
		exit "Unknown option \$destination = '$destination'"
	end
end

### end of macro ###

ATOM RSS1 RSS2