Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

from ._compat import ord_byte 

 

import math 

import string 

from logging import getLogger 

 

_logger = getLogger('pwm.encoding') 

 

# 'full' repeats digits twice, to increase the probablity of a digit appearing in a default 16 

# character password, for sites that suck at estimating entropy and requires digits to be present 

PRESETS = { 

    'full': string.ascii_letters + 2 * string.digits + '!#$%&()*+,-./:;=?@[]^_|~', 

    'alpha': string.ascii_letters, 

    'numeric': string.digits, 

    'alphanumeric': string.ascii_letters + string.digits, 

} 

 

def ceildiv(dividend, divisor): 

    ''' integer ceiling division ''' 

    return (dividend + divisor - 1) // divisor 

 

def calc_chunklen(alph_len): 

    ''' 

    computes the ideal conversion ratio for the given alphabet. 

    A ratio is considered ideal when the number of bits in one output 

    encoding chunk that don't add up to one input encoding chunk is minimal. 

    ''' 

    binlen, enclen = min([ 

                          (i, i*8 / math.log(alph_len, 2)) 

                          for i in range(1, 7) 

                         ], key=lambda k: k[1] % 1) 

 

    return binlen, int(enclen) 

 

 

class Encoder(object): 

    ''' 

    general-purpose encoder. Encodes arbitrary binary data with a given 

    specific base ("alphabet"). 

    ''' 

 

    def __init__(self, alphabet): 

        self.alphabet = alphabet 

        self.chunklen = calc_chunklen(len(alphabet)) 

 

 

    def encode(self, digest, total_len): 

        nchunks = ceildiv(len(digest), self.chunklen[0]) 

        binstr = digest.ljust(nchunks * self.chunklen[0], b'\0') 

 

        return ''.join([ 

                self._encode_chunk(binstr, i) for i in range(0, nchunks) 

            ])[:total_len] 

 

    def _encode_chunk(self, data, index): 

        ''' 

        gets a chunk from the input data, converts it to a number and 

        encodes that number 

        ''' 

        chunk = self._get_chunk(data, index) 

        return self._encode_long(self._chunk_to_long(chunk)) 

 

    def _encode_long(self, val): 

        ''' 

        encodes an integer of 8*self.chunklen[0] bits using the specified 

        alphabet 

        ''' 

        return ''.join([ 

                self.alphabet[(val//len(self.alphabet)**i) % len(self.alphabet)] 

                for i in reversed(range(self.chunklen[1])) 

            ]) 

 

    def _chunk_to_long(self, chunk): 

        ''' 

        parses a chunk of bytes to integer using big-endian representation 

        ''' 

        return sum([ 

                256**(self.chunklen[0]-1-i) * ord_byte(chunk[i]) 

                for i in range(self.chunklen[0]) 

            ]) 

 

    def _get_chunk(self, data, index): 

        ''' 

        partition the data into chunks and retrieve the chunk at the given index 

        ''' 

        return data[index*self.chunklen[0]:(index+1)*self.chunklen[0]] 

 

 

def lookup_alphabet(charset): 

    ''' 

    retrieves a named charset or treats the input as a custom alphabet and use that 

    ''' 

    if charset in PRESETS: 

        return PRESETS[charset] 

96    if len(charset) < 16: 

        _logger.warning('very small alphabet in use, possibly a failed lookup?') 

    return charset