punycode-2.0/000755 000765 000024 00000000000 12072104303 014165 5ustar00litherumstaff000000 000000 punycode-2.0/Data/000755 000765 000024 00000000000 12072104303 015036 5ustar00litherumstaff000000 000000 punycode-2.0/LICENSE000644 000765 000024 00000002532 12072104303 015174 0ustar00litherumstaff000000 000000 The following license covers this documentation, and the source code, except where otherwise indicated. Copyright 2012, Myles C. Maxfield. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. punycode-2.0/punycode.cabal000644 000765 000024 00000002737 12072104303 017010 0ustar00litherumstaff000000 000000 name: punycode version: 2.0 license: BSD3 license-file: LICENSE author: Myles C. Maxfield maintainer: Myles C. Maxfield synopsis: Encode unicode strings to ascii forms according to RFC 3492 description: Encode unicode strings to ascii forms according to RFC 3492. It is written in pure Haskell, as opposed to gnuidn's Data.Text.IDN.Punycode. Please note that Data.Encoding.BootString from the 'encoding' package also contains an implementation of the Punycode algorithm. category: Text, Web stability: Experimental cabal-version: >= 1.8 build-type: Simple homepage: https://github.com/litherum/punycode library build-depends: base >= 4 && < 5 , bytestring , cereal , mtl , text exposed-modules: Data.Text.Punycode other-modules: Data.Text.Punycode.Encode , Data.Text.Punycode.Decode , Data.Text.Punycode.Shared ghc-options: -Wall Test-Suite test-punycode type: exitcode-stdio-1.0 main-is: Test/Main.hs build-depends: base >= 4 && < 5 , bytestring , cereal , mtl , text , HUnit , QuickCheck , encoding source-repository head type: git location: git://github.com/litherum/punycode.git punycode-2.0/Setup.lhs000644 000765 000024 00000000116 12072104303 015773 0ustar00litherumstaff000000 000000 #! /usr/bin/env runhaskell > import Distribution.Simple > main = defaultMain punycode-2.0/Test/000755 000765 000024 00000000000 12072104303 015104 5ustar00litherumstaff000000 000000 punycode-2.0/Test/Main.hs000644 000765 000024 00000016245 12072104303 016334 0ustar00litherumstaff000000 000000 import qualified Data.ByteString as BS import Data.Char import qualified Data.Encoding as E import qualified Data.Encoding.BootString as EB import qualified Data.Text as T import Data.Word (Word8) import System.Exit import Test.HUnit import Test.QuickCheck import Data.Text.Punycode tests = [( [0x0644, 0x064A, 0x0647, 0x0645, 0x0627, 0x0628, 0x062A, 0x0643, 0x0644, 0x0645, 0x0648, 0x0634, 0x0639, 0x0631, 0x0628, 0x064A, 0x061F] , "egbpdaj6bu4bxfgehfvwxn" , "A" ), ( [0x4ED6, 0x4EEC, 0x4E3A, 0x4EC0, 0x4E48, 0x4E0D, 0x8BF4, 0x4E2D, 0x6587] , "ihqwcrb4cv8a8dqg056pqjye" , "B" ), ( [0x4ED6, 0x5011, 0x7232, 0x4EC0, 0x9EBD, 0x4E0D, 0x8AAA, 0x4E2D, 0x6587] , "ihqwctvzc91f659drss3x8bo0yb" , "C" ), ( [0x0050, 0x0072, 0x006F, 0x010D, 0x0070, 0x0072, 0x006F, 0x0073, 0x0074, 0x011B, 0x006E, 0x0065, 0x006D, 0x006C, 0x0075, 0x0076, 0x00ED, 0x010D, 0x0065, 0x0073, 0x006B, 0x0079] , "Proprostnemluvesky-uyb24dma41a" , "D" ), ( [0x05DC, 0x05DE, 0x05D4, 0x05D4, 0x05DD, 0x05E4, 0x05E9, 0x05D5, 0x05D8, 0x05DC, 0x05D0, 0x05DE, 0x05D3, 0x05D1, 0x05E8, 0x05D9, 0x05DD, 0x05E2, 0x05D1, 0x05E8, 0x05D9, 0x05EA] , "4dbcagdahymbxekheh6e0a7fei0b" , "E" ), ( [0x092F, 0x0939, 0x0932, 0x094B, 0x0917, 0x0939, 0x093F, 0x0928, 0x094D, 0x0926, 0x0940, 0x0915, 0x094D, 0x092F, 0x094B, 0x0902, 0x0928, 0x0939, 0x0940, 0x0902, 0x092C, 0x094B, 0x0932, 0x0938, 0x0915, 0x0924, 0x0947, 0x0939, 0x0948, 0x0902] , "i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd" , "F" ), ( [0x306A, 0x305C, 0x307F, 0x3093, 0x306A, 0x65E5, 0x672C, 0x8A9E, 0x3092, 0x8A71, 0x3057, 0x3066, 0x304F, 0x308C, 0x306A, 0x3044, 0x306E, 0x304B] , "n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa" , "G" ), ( [0xC138, 0xACC4, 0xC758, 0xBAA8, 0xB4E0, 0xC0AC, 0xB78C, 0xB4E4, 0xC774, 0xD55C, 0xAD6D, 0xC5B4, 0xB97C, 0xC774, 0xD574, 0xD55C, 0xB2E4, 0xBA74, 0xC5BC, 0xB9C8, 0xB098, 0xC88B, 0xC744, 0xAE4C] , "989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c" , "H" ), ( [0x043F, 0x043E, 0x0447, 0x0435, 0x043C, 0x0443, 0x0436, 0x0435, 0x043E, 0x043D, 0x0438, 0x043D, 0x0435, 0x0433, 0x043E, 0x0432, 0x043E, 0x0440, 0x044F, 0x0442, 0x043F, 0x043E, 0x0440, 0x0443, 0x0441, 0x0441, 0x043A, 0x0438] , "b1abfaaepdrnnbgefbaDotcwatmq2g4l" , "I" ), ( [0x0050, 0x006F, 0x0072, 0x0071, 0x0075, 0x00E9, 0x006E, 0x006F, 0x0070, 0x0075, 0x0065, 0x0064, 0x0065, 0x006E, 0x0073, 0x0069, 0x006D, 0x0070, 0x006C, 0x0065, 0x006D, 0x0065, 0x006E, 0x0074, 0x0065, 0x0068, 0x0061, 0x0062, 0x006C, 0x0061, 0x0072, 0x0065, 0x006E, 0x0045, 0x0073, 0x0070, 0x0061, 0x00F1, 0x006F, 0x006C] , "PorqunopuedensimplementehablarenEspaol-fmd56a" , "J" ), ( [0x0054, 0x1EA1, 0x0069, 0x0073, 0x0061, 0x006F, 0x0068, 0x1ECD, 0x006B, 0x0068, 0x00F4, 0x006E, 0x0067, 0x0074, 0x0068, 0x1EC3, 0x0063, 0x0068, 0x1EC9, 0x006E, 0x00F3, 0x0069, 0x0074, 0x0069, 0x1EBF, 0x006E, 0x0067, 0x0056, 0x0069, 0x1EC7, 0x0074] , "TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g" , "K" ), ( [0x0033, 0x5E74, 0x0042, 0x7D44, 0x91D1, 0x516B, 0x5148, 0x751F] , "3B-ww4c5e180e575a65lsy2b" , "L" ), ( [0x5B89, 0x5BA4, 0x5948, 0x7F8E, 0x6075, 0x002D, 0x0077, 0x0069, 0x0074, 0x0068, 0x002D, 0x0053, 0x0055, 0x0050, 0x0045, 0x0052, 0x002D, 0x004D, 0x004F, 0x004E, 0x004B, 0x0045, 0x0059, 0x0053] , "-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n" , "M" ), ( [0x0048, 0x0065, 0x006C, 0x006C, 0x006F, 0x002D, 0x0041, 0x006E, 0x006F, 0x0074, 0x0068, 0x0065, 0x0072, 0x002D, 0x0057, 0x0061, 0x0079, 0x002D, 0x305D, 0x308C, 0x305E, 0x308C, 0x306E, 0x5834, 0x6240] , "Hello-Another-Way--fc4qua05auwb3674vfr0b" , "N" ), ( [0x3072, 0x3068, 0x3064, 0x5C4B, 0x6839, 0x306E, 0x4E0B, 0x0032] , "2-u9tlzr9756bt3uc0v" , "O" ), ( [0x004D, 0x0061, 0x006A, 0x0069, 0x3067, 0x004B, 0x006F, 0x0069, 0x3059, 0x308B, 0x0035, 0x79D2, 0x524D] , "MajiKoi5-783gue6qz075azm5e" , "P" ), ( [0x30D1, 0x30D5, 0x30A3, 0x30FC, 0x0064, 0x0065, 0x30EB, 0x30F3, 0x30D0] , "de-jg4avhby1noc0d" , "Q" ), ( [0x305D, 0x306E, 0x30B9, 0x30D4, 0x30FC, 0x30C9, 0x3067] , "d9juau41awczczp" , "R" ), ( [0x002D, 0x003E, 0x0020, 0x0024, 0x0031, 0x002E, 0x0030, 0x0030, 0x0020, 0x003C, 0x002D] , "-> $1.00 <--" , "S" ) ] hunittests = TestList [encodeTests, decodeTests] where encodeTests = TestList $ map f tests where f (decoded, encoded, testname) = TestCase (assertEqual testname (BS.pack $ map (fromIntegral . ord . toLower) encoded) (encode $ T.pack $ map (toLower . chr) decoded)) decodeTests = TestList $ map f tests where f (decoded, encoded, testname) = TestCase (assertEqual testname (Right $ T.pack $ map (toLower . chr) decoded) (decode $ BS.pack $ map (fromIntegral . ord . toLower) encoded)) -- Work around the fact that there is no Arbitrary instance for Text inverseTest :: String -> Bool inverseTest s | all isAscii s = True | otherwise = case decoded of Left _ -> False Right x -> x == packed where packed = T.pack s decoded = decode $ encode packed matchesEncodingDecodeTest :: [Word8] -> Bool matchesEncodingDecodeTest = helper . BS.pack where helper s = helper1 (decode s) (E.decodeStrictByteStringExplicit EB.punycode s) where helper1 (Right m) (Right t) = m == T.pack t helper1 (Left _) (Left _) = True helper1 _ _ = False matchesEncodingEncodeTest :: String -> Bool matchesEncodingEncodeTest s = helper (encode $ T.pack s) (E.encodeStrictByteStringExplicit EB.punycode s) where helper m (Right t) = m == t helper _ _ = False internalStringIsNeverTooShort :: [Word8] -> Bool internalStringIsNeverTooShort s = case decode $ BS.pack s of Left InternalStringTooShort -> False _ -> True main :: IO () main = do result1 <- quickCheckWithResult (stdArgs {maxSuccess = 1000000, maxSize = 100}) inverseTest result2 <- quickCheckWithResult (stdArgs {maxSuccess = 1000000, maxSize = 100}) matchesEncodingDecodeTest result3 <- quickCheckWithResult (stdArgs {maxSuccess = 1000000, maxSize = 100}) matchesEncodingEncodeTest result4 <- quickCheckWithResult (stdArgs {maxSuccess = 1000000, maxSize = 100}) internalStringIsNeverTooShort counts <- runTestTT hunittests case (errors counts, failures counts, result1, result2, result3, result4) of (0, 0, Success {}, Success {}, Success {}, Success {}) -> exitSuccess _ -> exitFailure punycode-2.0/Data/Text/000755 000765 000024 00000000000 12072104303 015762 5ustar00litherumstaff000000 000000 punycode-2.0/Data/Text/Punycode/000755 000765 000024 00000000000 12072104303 017550 5ustar00litherumstaff000000 000000 punycode-2.0/Data/Text/Punycode.hs000644 000765 000024 00000000302 12072104303 020077 0ustar00litherumstaff000000 000000 module Data.Text.Punycode (encode, PunycodeDecodeException (..), decode) where import Data.Text.Punycode.Encode (encode) import Data.Text.Punycode.Decode (PunycodeDecodeException (..), decode) punycode-2.0/Data/Text/Punycode/Decode.hs000644 000765 000024 00000007141 12072104303 021272 0ustar00litherumstaff000000 000000 {-# LANGUAGE DeriveDataTypeable #-} module Data.Text.Punycode.Decode (PunycodeDecodeException (..), decode) where import Control.Exception.Base import qualified Data.ByteString as BS import Data.Char import Data.Serialize hiding (decode) import qualified Data.Text as T import Data.Typeable import Data.Word import Data.Text.Punycode.Shared data PunycodeDecodeException = GenericDecodeException | InternalStringTooShort | InputTooShort | RightOfHyphenShouldBeAlphanumeric | LeftOfHyphenShouldBeBasic | CantStartWithDash | InvalidCodePoint deriving (Eq,Show,Typeable) instance Exception PunycodeDecodeException -- | Decode a string into its unicode form decode :: BS.ByteString -> Either PunycodeDecodeException T.Text decode input | input == BS.pack [45, 45] = Right $ T.pack "-" | not (BS.null input) && BS.length (BS.filter (== 45) input) == 1 && BS.head input == 45 = Left CantStartWithDash | T.any (not . isExtendedBasic) before = Left LeftOfHyphenShouldBeBasic | otherwise = case runGet (inner2 initial_n 0 initial_bias before) after of Right out -> out Left _ -> Left InputTooShort where (before, after) | BS.any f input = (T.pack $ map (chr . fromIntegral) $ BS.unpack $ BS.init b1, a1) | otherwise = (T.empty, input) f = (== (fromIntegral $ ord '-')) (b1, a1) = BS.breakEnd f input inner2 :: Int -> Int -> Int -> T.Text -> Get (Either PunycodeDecodeException T.Text) inner2 n oldi bias output = do b <- isEmpty helper b where helper False = do i <- inner base 1 oldi bias helper' i where helper' Nothing = return $ Left RightOfHyphenShouldBeAlphanumeric helper' (Just i) = case output' of Right output'' -> inner2 n' (i' + 1) bias' output'' Left err -> return $ Left err where bias' = adapt (i - oldi) (T.length output + 1) (oldi == 0) n' = n + i `div` (T.length output + 1) i' = i `mod` (T.length output + 1) output' = insertInto output n' i' helper True = return $ Right output inner :: Int -> Int -> Int -> Int -> Get (Maybe Int) inner k w i bias = do word8 <- getWord8 helper $ word8ToDigit word8 where helper Nothing = return Nothing helper (Just digit) | digit < t = return $ Just i' | otherwise = inner (k + base) w' i' bias where w' = w * (base - t) i' = i + digit * w t | k <= bias + tmin = tmin | k >= bias + tmax = tmax | otherwise = k - bias insertInto :: T.Text -> Int -> Int -> Either PunycodeDecodeException T.Text insertInto input n i | T.length input < i = Left InternalStringTooShort | otherwise = case n' of Just n'' -> Right $ T.concat [T.take i input, T.singleton n'', T.drop i input] Nothing -> Left InvalidCodePoint where n' = safeChr n safeChr :: Int -> Maybe Char safeChr x | x >= 0 && x <= fromEnum (maxBound :: Char) = Just $ chr x | otherwise = Nothing word8ToDigit :: Word8 -> Maybe Int word8ToDigit = helper . fromIntegral where helper word8 | word8 >= ord 'a' && word8 <= ord 'z' = Just $ word8 - (ord 'a') | word8 >= ord 'A' && word8 <= ord 'Z' = Just $ word8 - (ord 'A') | word8 >= ord '0' && word8 <= ord '9' = Just $ 26 + word8 - (ord '0') | otherwise = Nothing isExtendedBasic :: Char -> Bool isExtendedBasic x | isBasic x = True | ord x == 128 = True | otherwise = False punycode-2.0/Data/Text/Punycode/Encode.hs000644 000765 000024 00000006121 12072104303 021301 0ustar00litherumstaff000000 000000 {-# LANGUAGE FlexibleContexts #-} module Data.Text.Punycode.Encode (encode) where import Control.Monad.State hiding (state) import Control.Monad.Writer import qualified Data.ByteString as BS import Data.Char import qualified Data.Text as T import qualified Data.Text.Encoding as TE import Data.Word import Data.Text.Punycode.Shared data PunycodeState = PunycodeState { n :: Int , delta :: Int , bias :: Int , h :: Int } -- | Encode a string into its ascii form encode :: T.Text -> BS.ByteString encode = execWriter . initialWriter initialWriter :: MonadWriter BS.ByteString m => T.Text -> m () initialWriter input = do tell basics when (b > 0) $ tell $ BS.singleton $ fromIntegral $ ord '-' evalStateT (inner3 (map ord $ T.unpack input) b) $ PunycodeState { n = initial_n , delta = 0 , bias = initial_bias , h = b } where basics = TE.encodeUtf8 $ T.filter isBasic input b = BS.length basics inner3 :: (MonadState PunycodeState m, MonadWriter BS.ByteString m) => [Int] -> Int -> m () inner3 input b = do state <- get helper state where helper state | h' < length input = do put $ state {n = m, delta = delta'} mapM_ (inner2 b) input state' <- get put $ state' {delta = (delta state') + 1, n = (n state') + 1} inner3 input b | otherwise = return () where m = minimum $ filter (>= n') input n' = n state h' = h state delta' = (delta state) + (m - n') * (h' + 1) inner2 :: (MonadState PunycodeState m, MonadWriter BS.ByteString m) => Int -> Int -> m () inner2 b c = do state <- get helper state where helper state | c == n' = do q <- inner delta' base bias' tell $ BS.singleton $ baseToAscii q put $ state {bias = adapt delta' (h' + 1) (h' == b), delta = 0, h = (h state) + 1} | otherwise = put $ state {delta = delta'} where delta' = (delta state) + d where d | c < n' = 1 | otherwise = 0 n' = n state bias' = bias state h' = h state inner :: (MonadWriter BS.ByteString m) => Int -> Int -> Int -> m Int inner q k bias' | q < t = return q | otherwise = do tell $ BS.singleton $ baseToAscii $ t + ((q - t) `mod` (base - t)) inner ((q - t) `div` (base - t)) (k + base) bias' where t | k <= bias' + tmin = tmin | k >= bias' + tmax = tmax | otherwise = k - bias' baseToAscii :: Int -> Word8 baseToAscii i | i < 26 = fromIntegral $ i + (ord 'a') | otherwise = fromIntegral $ (i - 26) + (ord '0') punycode-2.0/Data/Text/Punycode/Shared.hs000644 000765 000024 00000001373 12072104303 021316 0ustar00litherumstaff000000 000000 module Data.Text.Punycode.Shared where import Data.Char (ord) base :: Int base = 36 tmin :: Int tmin = 1 tmax :: Int tmax = 26 skew :: Int skew = 38 damp :: Int damp = 700 initial_bias :: Int initial_bias = 72 initial_n :: Int initial_n = 128 adapt :: Int -> Int -> Bool -> Int adapt delta numpoints firsttime = helper where helper = loop 0 $ delta' + (delta' `div` numpoints) where delta' | firsttime = delta `div` damp | otherwise = delta `div` 2 loop k delta' | delta' > ((base - tmin) * tmax) `div` 2 = loop (k + base) $ delta' `div` (base - tmin) | otherwise = k + (((base - tmin + 1) * delta') `div` (delta' + skew)) isBasic :: Char -> Bool isBasic = (< initial_n) . ord