steuermann/nodes.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328

'''
Stuff related to the tree structure of the command set
'''

import fnmatch
#import exyapps.runtime


#####
#####

class command_tree(object):

    # a dict that maps currently known node names to node objects
    node_index = None

    # This is a list if (before, after) showing names of node orders;
    # the individual nodes do not store this information while the
    # parsing is happening.
    node_order = None

    # call init() once before parsing, then call parse for each file, then call finish()
    def __init__( self ) :
        self.node_index = { }
        self.node_order = [ ]

    def finish( self ) :
        # This links up the nodes according to the (before, after) information
        # in node_order.  before and after are fully qualified node names.
        # We do this at the end so that 
        #  - we can have forward references
        #  - we can use wild cards in our AFTER clauses

        # before is the predecessor, after comes AFTER the predecessor
        for before, after, required, pos in self.node_order :
            if ( '*' in before ) or ( '?' in before ) or ( '[' in before ):
                host, table, cmd = crack_name(before)
                for x in self.node_index :
                    hl, tl, cl = crack_name(x)
                    if ( fnmatch.fnmatchcase(hl,host ) and 
                         fnmatch.fnmatchcase(tl,table) and 
                         fnmatch.fnmatchcase(cl,cmd  ) ) :
                        # yes, the wild card matches this one; connect them
                        # (but don't let a node come before itself because the wild card is too loose)
                        if x != after :
                            self.connect(x, after, required, pos)
            else :
                self.connect(before, after, required, pos)

        # Work out the depths of each node.
        # Since we are walking the graph, we also detect dependency loops here.
        compute_depths( self.node_index )
        

    # make the actual connection between nodes
    def connect( self, before, after, required, line ) :

        if not before in self.node_index :
            if required :
                print "error: %s happens after non-existant %s - line %s"%(before,after,line)
            return

        if not after in self.node_index :
            print "error: after node %s does not exist %s"%(after,line)
            return

        # tell the after node that the other one comes first

        self.node_index[after].predecessors.append(self.node_index[before])

        # tell the after node about the before node and note that
        # the before node is not done yet.
        self.node_index[after].released[before] = False


    # create a set of nodes for a particular command - called from within the parser
    def add_command_list( self, table, hostlist, command_list ) :
        for host in hostlist :
            this_table = '%s:%s' % ( host, table )
            for command, script, script_type, after_clause, pos in command_list :
                # this happens once for each CMD clause
                # command is the name of this command
                # script is the script to run
                # after is a list of AFTER clauses
                # pos is where in this file this command was defined

                command = normalize_name( host, table, command )

                if command in self.node_index :
                    # bug: should be error
                    print "# warning: %s already used on line %s"%(command,self.node_index[command].input_line)

                # create the node
                self.node_index[command]=node(command, script, script_type, nice_pos( current_file_name, pos)  )

                for before_name, required, pos in after_clause :
                    # this happens once for each AFTER clause
                    # before is the name of a predecessor that this one comes after
                    # required is a boolean, whether the predecessor must exist
                    # pos is where in the file the AFTER clause begins

                    before_name = normalize_name( host, table, before_name )

                    # list of ( before, after, required, pos )
                    self.node_order.append( (before_name, command, required, pos) )


#####

# crack open host:table/cmd
def crack_name(name) :
    if ':' in name :
        t = name.split(':')
        host = t[0]
        name = t[1]
    else :
        host = '*'

    if '/' in name :
        t = name.split('/')
        table = t[0]
        cmd = t[1]
    else :
        table = '*'
        cmd = name

    return (host, table, cmd)

#####

def join_name( host, table, cmd ) :
    return '%s:%s/%s'%(host,table,cmd)

#####

def normalize_name( host, table, name ) :

    if not '/' in name :
        name = table + '/' + name

    if name.startswith('.:') :
        name = name[2:]

    if not ':' in name :
        name = host + ':' + name
    # print "## return %s"%name
    return name

#####


_wildcard_cache = ( None, None )

def wildcard_name( wild, name ) :
    global _wildcard_cache

    if wild != _wildcard_cache :
        host, table, cmd = crack_name(wild)
        _wildcard_cache = ( name, ( host, table, cmd ) )

    host, table, cmd = _wildcard_cache[1]

    hl, tl, cl = crack_name(name)

    return ( fnmatch.fnmatchcase(hl,host ) and
         fnmatch.fnmatchcase(tl,table) and
         fnmatch.fnmatchcase(cl,cmd  ) )


#####


#####

    
# a node object for each command instance that will be run.  The name is
# "host:table/command".  The shell command to execute on the target host
# is "script".
#
# predecessors[] is a list of everything that this node must definitely come after.
#
# released is a dict indexed by each of the "before" nodes; the value is
# true if the before node is finished running.
#
class node(object) :
    def __init__(self, name, script, script_type, input_line) :
        # the fully qualified name of the node
        self.name = name

        # the command script that this node runs
        self.script = script
        self.script_type = script_type

        # what line of the input file specified this node; this is
        # a string of the form "foo.bar 123"
        self.input_line = input_line

        # crack open host:table/cmd
        self.host, self.table, self.cmd = crack_name(name)

        # this command runs after every node in this list
        self.predecessors = [ ]

        # this is a dict of everything that comes before us
        # The key is the name of the other node.
        # The value is true/false for whether the other node
        # is finished running.
        self.released = { }

        # These flags are 1 or 0 so we can sum() them
        self.finished = 0
        self.running = 0

#####

# debug - make a string representation of all the nodes

def show_nodes( node_index ) :
    import cStringIO as StringIO
    s = StringIO.StringIO()
    for x in sorted( [ x for x in node_index ] ) :    
        x = node_index[x]

        # show the node name and the command it runs
        s.write( "NODE  %s  %s  %s\n"%(x.name, x.script, x.input_line) )

        # show each node that this one comes After, and show the flag
        # whether the previous node has run yet.
        for y in x.predecessors :
            s.write( "        AFTER %s\n"%y.name )

    return s.getvalue()

#####

def nice_pos( filename, yapps_pos ) :
    # filename is the file name to use
    # yapps_pos is a tuple of (file, line, col) except that file
    # is a file-like object with no traceability to an actual file1
    return "%s:%s col %s"%(filename, yapps_pos[1], yapps_pos[2])

#####


#####
#
# calculating depths of a node tree

def c_d_fn(x,depth) :

    if x.in_recursion :
        print "error: loop detected at",x.name
        return

    # if it is already deeper than where we are now, we can (must)
    # prune the tree walk here.
    if x.depth  >= depth :
        return

    if depth > 100 :
        # bug: proxy for somebody wrote a loop
        print "error: depth > 100, node = ",x.name
        return

    x.in_recursion = 1

    # assign the depth
    x.depth = depth

    # make all the successors one deeper
    depth = depth + 1
    for y in x.successors :
        c_d_fn(y,depth)

    x.in_recursion = 0

def compute_depths(nodes) :

    # init everything
    for x in nodes :
        x = nodes[x]
        x.depth = 0
        x.successors = [ ]
        x.in_recursion = 0

    # walk the nodes in an arbitrary order; make a list of successors
    for x in nodes :
        x = nodes[x]
        for y in x.predecessors :
            if not x in y.successors :
                y.successors.append(x)

    # recursively walk down the tree assigning depth values, starting
    # with depth=1 for the highest level
    for x in nodes :
        c_d_fn(nodes[x],1)

    #
    for x in nodes :
        x = nodes[x]
        del x.in_recursion


#####

import specfile

current_file_name = None

def read_file_list( file_list ) :
    global current_file_name
    di = command_tree( ) 
    for x in file_list :
        current_file_name = x
        sc = specfile.specfileScanner( open(x,'r').read() )
        p = specfile.specfile( scanner=sc, data=di )
        result = specfile.wrap_error_reporter( p, 'start' )
    di.finish()
    return di

if __name__=='__main__':
    import sys
    n = read_file_list( sys.argv[1:] )
    print show_nodes(n.node_index)