Context Navigation

source: trunk/third/gcc/unroll.c @ 8834

Visit:

Revision 8834, 116.5 KB checked in by ghudson, 28 years ago (diff)
This commit was generated by cvs2svn to compensate for changes in r8833, which included commits to RCS files with non-trunk default branches.

Line
1	/* Try to unroll loops, and split induction variables.
2	Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
3	Contributed by James E. Wilson, Cygnus Support/UC Berkeley.
4
5	This file is part of GNU CC.
6
7	GNU CC is free software; you can redistribute it and/or modify
8	it under the terms of the GNU General Public License as published by
9	the Free Software Foundation; either version 2, or (at your option)
10	any later version.
11
12	GNU CC is distributed in the hope that it will be useful,
13	but WITHOUT ANY WARRANTY; without even the implied warranty of
14	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15	GNU General Public License for more details.
16
17	You should have received a copy of the GNU General Public License
18	along with GNU CC; see the file COPYING. If not, write to
19	the Free Software Foundation, 59 Temple Place - Suite 330,
20	Boston, MA 02111-1307, USA. */
21
22	/* Try to unroll a loop, and split induction variables.
23
24	Loops for which the number of iterations can be calculated exactly are
25	handled specially. If the number of iterations times the insn_count is
26	less than MAX_UNROLLED_INSNS, then the loop is unrolled completely.
27	Otherwise, we try to unroll the loop a number of times modulo the number
28	of iterations, so that only one exit test will be needed. It is unrolled
29	a number of times approximately equal to MAX_UNROLLED_INSNS divided by
30	the insn count.
31
32	Otherwise, if the number of iterations can be calculated exactly at
33	run time, and the loop is always entered at the top, then we try to
34	precondition the loop. That is, at run time, calculate how many times
35	the loop will execute, and then execute the loop body a few times so
36	that the remaining iterations will be some multiple of 4 (or 2 if the
37	loop is large). Then fall through to a loop unrolled 4 (or 2) times,
38	with only one exit test needed at the end of the loop.
39
40	Otherwise, if the number of iterations can not be calculated exactly,
41	not even at run time, then we still unroll the loop a number of times
42	approximately equal to MAX_UNROLLED_INSNS divided by the insn count,
43	but there must be an exit test after each copy of the loop body.
44
45	For each induction variable, which is dead outside the loop (replaceable)
46	or for which we can easily calculate the final value, if we can easily
47	calculate its value at each place where it is set as a function of the
48	current loop unroll count and the variable's value at loop entry, then
49	the induction variable is split into `N' different variables, one for
50	each copy of the loop body. One variable is live across the backward
51	branch, and the others are all calculated as a function of this variable.
52	This helps eliminate data dependencies, and leads to further opportunities
53	for cse. */
54
55	/* Possible improvements follow: */
56
57	/* ??? Add an extra pass somewhere to determine whether unrolling will
58	give any benefit. E.g. after generating all unrolled insns, compute the
59	cost of all insns and compare against cost of insns in rolled loop.
60
61	- On traditional architectures, unrolling a non-constant bound loop
62	is a win if there is a giv whose only use is in memory addresses, the
63	memory addresses can be split, and hence giv increments can be
64	eliminated.
65	- It is also a win if the loop is executed many times, and preconditioning
66	can be performed for the loop.
67	Add code to check for these and similar cases. */
68
69	/* ??? Improve control of which loops get unrolled. Could use profiling
70	info to only unroll the most commonly executed loops. Perhaps have
71	a user specifyable option to control the amount of code expansion,
72	or the percent of loops to consider for unrolling. Etc. */
73
74	/* ??? Look at the register copies inside the loop to see if they form a
75	simple permutation. If so, iterate the permutation until it gets back to
76	the start state. This is how many times we should unroll the loop, for
77	best results, because then all register copies can be eliminated.
78	For example, the lisp nreverse function should be unrolled 3 times
79	while (this)
80	{
81	next = this->cdr;
82	this->cdr = prev;
83	prev = this;
84	this = next;
85	}
86
87	??? The number of times to unroll the loop may also be based on data
88	references in the loop. For example, if we have a loop that references
89	x[i-1], x[i], and x[i+1], we should unroll it a multiple of 3 times. */
90
91	/* ??? Add some simple linear equation solving capability so that we can
92	determine the number of loop iterations for more complex loops.
93	For example, consider this loop from gdb
94	#define SWAP_TARGET_AND_HOST(buffer,len)
95	{
96	char tmp;
97	char p = (char ) buffer;
98	char q = ((char ) buffer) + len - 1;
99	int iterations = (len + 1) >> 1;
100	int i;
101	for (p; p < q; p++, q--;)
102	{
103	tmp = *q;
104	q = p;
105	*p = tmp;
106	}
107	}
108	Note that:
109	start value = p = &buffer + current_iteration
110	end value = q = &buffer + len - 1 - current_iteration
111	Given the loop exit test of "p < q", then there must be "q - p" iterations,
112	set equal to zero and solve for number of iterations:
113	q - p = len - 1 - 2*current_iteration = 0
114	current_iteration = (len - 1) / 2
115	Hence, there are (len - 1) / 2 (rounded up to the nearest integer)
116	iterations of this loop. */
117
118	/* ??? Currently, no labels are marked as loop invariant when doing loop
119	unrolling. This is because an insn inside the loop, that loads the address
120	of a label inside the loop into a register, could be moved outside the loop
121	by the invariant code motion pass if labels were invariant. If the loop
122	is subsequently unrolled, the code will be wrong because each unrolled
123	body of the loop will use the same address, whereas each actually needs a
124	different address. A case where this happens is when a loop containing
125	a switch statement is unrolled.
126
127	It would be better to let labels be considered invariant. When we
128	unroll loops here, check to see if any insns using a label local to the
129	loop were moved before the loop. If so, then correct the problem, by
130	moving the insn back into the loop, or perhaps replicate the insn before
131	the loop, one copy for each time the loop is unrolled. */
132
133	/* The prime factors looked for when trying to unroll a loop by some
134	number which is modulo the total number of iterations. Just checking
135	for these 4 prime factors will find at least one factor for 75% of
136	all numbers theoretically. Practically speaking, this will succeed
137	almost all of the time since loops are generally a multiple of 2
138	and/or 5. */
139
140	#define NUM_FACTORS 4
141
142	struct _factor { int factor, count; } factors[NUM_FACTORS]
143	= { {2, 0}, {3, 0}, {5, 0}, {7, 0}};
144
145	/* Describes the different types of loop unrolling performed. */
146
147	enum unroll_types { UNROLL_COMPLETELY, UNROLL_MODULO, UNROLL_NAIVE };
148
149	#include "config.h"
150	#include "rtl.h"
151	#include "insn-config.h"
152	#include "integrate.h"
153	#include "regs.h"
154	#include "flags.h"
155	#include "expr.h"
156	#include <stdio.h>
157	#include "loop.h"
158
159	/* This controls which loops are unrolled, and by how much we unroll
160	them. */
161
162	#ifndef MAX_UNROLLED_INSNS
163	#define MAX_UNROLLED_INSNS 100
164	#endif
165
166	/* Indexed by register number, if non-zero, then it contains a pointer
167	to a struct induction for a DEST_REG giv which has been combined with
168	one of more address givs. This is needed because whenever such a DEST_REG
169	giv is modified, we must modify the value of all split address givs
170	that were combined with this DEST_REG giv. */
171
172	static struct induction **addr_combined_regs;
173
174	/* Indexed by register number, if this is a splittable induction variable,
175	then this will hold the current value of the register, which depends on the
176	iteration number. */
177
178	static rtx *splittable_regs;
179
180	/* Indexed by register number, if this is a splittable induction variable,
181	then this will hold the number of instructions in the loop that modify
182	the induction variable. Used to ensure that only the last insn modifying
183	a split iv will update the original iv of the dest. */
184
185	static int *splittable_regs_updates;
186
187	/* Values describing the current loop's iteration variable. These are set up
188	by loop_iterations, and used by precondition_loop_p. */
189
190	static rtx loop_iteration_var;
191	static rtx loop_initial_value;
192	static rtx loop_increment;
193	static rtx loop_final_value;
194
195	/* Forward declarations. */
196
197	static void init_reg_map PROTO((struct inline_remap *, int));
198	static int precondition_loop_p PROTO((rtx , rtx , rtx *, rtx, rtx));
199	static rtx calculate_giv_inc PROTO((rtx, rtx, int));
200	static rtx initial_reg_note_copy PROTO((rtx, struct inline_remap *));
201	static void final_reg_note_copy PROTO((rtx, struct inline_remap *));
202	static void copy_loop_body PROTO((rtx, rtx, struct inline_remap *, rtx, int,
203	enum unroll_types, rtx, rtx, rtx, rtx));
204	static void iteration_info PROTO((rtx, rtx , rtx , rtx, rtx));
205	static rtx approx_final_value PROTO((enum rtx_code, rtx, int , int ));
206	static int find_splittable_regs PROTO((enum unroll_types, rtx, rtx, rtx, int));
207	static int find_splittable_givs PROTO((struct iv_class *,enum unroll_types,
208	rtx, rtx, rtx, int));
209	static int reg_dead_after_loop PROTO((rtx, rtx, rtx));
210	static rtx fold_rtx_mult_add PROTO((rtx, rtx, rtx, enum machine_mode));
211	static rtx remap_split_bivs PROTO((rtx));
212
213	/* Try to unroll one loop and split induction variables in the loop.
214
215	The loop is described by the arguments LOOP_END, INSN_COUNT, and
216	LOOP_START. END_INSERT_BEFORE indicates where insns should be added
217	which need to be executed when the loop falls through. STRENGTH_REDUCTION_P
218	indicates whether information generated in the strength reduction pass
219	is available.
220
221	This function is intended to be called from within `strength_reduce'
222	in loop.c. */
223
224	void
225	unroll_loop (loop_end, insn_count, loop_start, end_insert_before,
226	strength_reduce_p)
227	rtx loop_end;
228	int insn_count;
229	rtx loop_start;
230	rtx end_insert_before;
231	int strength_reduce_p;
232	{
233	int i, j, temp;
234	int unroll_number = 1;
235	rtx copy_start, copy_end;
236	rtx insn, copy, sequence, pattern, tem;
237	int max_labelno, max_insnno;
238	rtx insert_before;
239	struct inline_remap *map;
240	char *local_label;
241	char *local_regno;
242	int maxregnum;
243	int new_maxregnum;
244	rtx exit_label = 0;
245	rtx start_label;
246	struct iv_class *bl;
247	int splitting_not_safe = 0;
248	enum unroll_types unroll_type;
249	int loop_preconditioned = 0;
250	rtx safety_label;
251	/* This points to the last real insn in the loop, which should be either
252	a JUMP_INSN (for conditional jumps) or a BARRIER (for unconditional
253	jumps). */
254	rtx last_loop_insn;
255
256	/* Don't bother unrolling huge loops. Since the minimum factor is
257	two, loops greater than one half of MAX_UNROLLED_INSNS will never
258	be unrolled. */
259	if (insn_count > MAX_UNROLLED_INSNS / 2)
260	{
261	if (loop_dump_stream)
262	fprintf (loop_dump_stream, "Unrolling failure: Loop too big.\n");
263	return;
264	}
265
266	/* When emitting debugger info, we can't unroll loops with unequal numbers
267	of block_beg and block_end notes, because that would unbalance the block
268	structure of the function. This can happen as a result of the
269	"if (foo) bar; else break;" optimization in jump.c. */
270
271	if (write_symbols != NO_DEBUG)
272	{
273	int block_begins = 0;
274	int block_ends = 0;
275
276	for (insn = loop_start; insn != loop_end; insn = NEXT_INSN (insn))
277	{
278	if (GET_CODE (insn) == NOTE)
279	{
280	if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_BEG)
281	block_begins++;
282	else if (NOTE_LINE_NUMBER (insn) == NOTE_INSN_BLOCK_END)
283	block_ends++;
284	}
285	}
286
287	if (block_begins != block_ends)
288	{
289	if (loop_dump_stream)
290	fprintf (loop_dump_stream,
291	"Unrolling failure: Unbalanced block notes.\n");
292	return;
293	}
294	}
295
296	/* Determine type of unroll to perform. Depends on the number of iterations
297	and the size of the loop. */
298
299	/* If there is no strength reduce info, then set loop_n_iterations to zero.
300	This can happen if strength_reduce can't find any bivs in the loop.
301	A value of zero indicates that the number of iterations could not be
302	calculated. */
303
304	if (! strength_reduce_p)
305	loop_n_iterations = 0;
306
307	if (loop_dump_stream && loop_n_iterations > 0)
308	fprintf (loop_dump_stream,
309	"Loop unrolling: %d iterations.\n", loop_n_iterations);
310
311	/* Find and save a pointer to the last nonnote insn in the loop. */
312
313	last_loop_insn = prev_nonnote_insn (loop_end);
314
315	/* Calculate how many times to unroll the loop. Indicate whether or
316	not the loop is being completely unrolled. */
317
318	if (loop_n_iterations == 1)
319	{
320	/* If number of iterations is exactly 1, then eliminate the compare and
321	branch at the end of the loop since they will never be taken.
322	Then return, since no other action is needed here. */
323
324	/* If the last instruction is not a BARRIER or a JUMP_INSN, then
325	don't do anything. */
326
327	if (GET_CODE (last_loop_insn) == BARRIER)
328	{
329	/* Delete the jump insn. This will delete the barrier also. */
330	delete_insn (PREV_INSN (last_loop_insn));
331	}
332	else if (GET_CODE (last_loop_insn) == JUMP_INSN)
333	{
334	#ifdef HAVE_cc0
335	/* The immediately preceding insn is a compare which must be
336	deleted. */
337	delete_insn (last_loop_insn);
338	delete_insn (PREV_INSN (last_loop_insn));
339	#else
340	/* The immediately preceding insn may not be the compare, so don't
341	delete it. */
342	delete_insn (last_loop_insn);
343	#endif
344	}
345	return;
346	}
347	else if (loop_n_iterations > 0
348	&& loop_n_iterations * insn_count < MAX_UNROLLED_INSNS)
349	{
350	unroll_number = loop_n_iterations;
351	unroll_type = UNROLL_COMPLETELY;
352	}
353	else if (loop_n_iterations > 0)
354	{
355	/* Try to factor the number of iterations. Don't bother with the
356	general case, only using 2, 3, 5, and 7 will get 75% of all
357	numbers theoretically, and almost all in practice. */
358
359	for (i = 0; i < NUM_FACTORS; i++)
360	factors[i].count = 0;
361
362	temp = loop_n_iterations;
363	for (i = NUM_FACTORS - 1; i >= 0; i--)
364	while (temp % factors[i].factor == 0)
365	{
366	factors[i].count++;
367	temp = temp / factors[i].factor;
368	}
369
370	/* Start with the larger factors first so that we generally
371	get lots of unrolling. */
372
373	unroll_number = 1;
374	temp = insn_count;
375	for (i = 3; i >= 0; i--)
376	while (factors[i].count--)
377	{
378	if (temp * factors[i].factor < MAX_UNROLLED_INSNS)
379	{
380	unroll_number *= factors[i].factor;
381	temp *= factors[i].factor;
382	}
383	else
384	break;
385	}
386
387	/* If we couldn't find any factors, then unroll as in the normal
388	case. */
389	if (unroll_number == 1)
390	{
391	if (loop_dump_stream)
392	fprintf (loop_dump_stream,
393	"Loop unrolling: No factors found.\n");
394	}
395	else
396	unroll_type = UNROLL_MODULO;
397	}
398
399
400	/* Default case, calculate number of times to unroll loop based on its
401	size. */
402	if (unroll_number == 1)
403	{
404	if (8 * insn_count < MAX_UNROLLED_INSNS)
405	unroll_number = 8;
406	else if (4 * insn_count < MAX_UNROLLED_INSNS)
407	unroll_number = 4;
408	else
409	unroll_number = 2;
410
411	unroll_type = UNROLL_NAIVE;
412	}
413
414	/* Now we know how many times to unroll the loop. */
415
416	if (loop_dump_stream)
417	fprintf (loop_dump_stream,
418	"Unrolling loop %d times.\n", unroll_number);
419
420
421	if (unroll_type == UNROLL_COMPLETELY \|\| unroll_type == UNROLL_MODULO)
422	{
423	/* Loops of these types should never start with a jump down to
424	the exit condition test. For now, check for this case just to
425	be sure. UNROLL_NAIVE loops can be of this form, this case is
426	handled below. */
427	insn = loop_start;
428	while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
429	insn = NEXT_INSN (insn);
430	if (GET_CODE (insn) == JUMP_INSN)
431	abort ();
432	}
433
434	if (unroll_type == UNROLL_COMPLETELY)
435	{
436	/* Completely unrolling the loop: Delete the compare and branch at
437	the end (the last two instructions). This delete must done at the
438	very end of loop unrolling, to avoid problems with calls to
439	back_branch_in_range_p, which is called by find_splittable_regs.
440	All increments of splittable bivs/givs are changed to load constant
441	instructions. */
442
443	copy_start = loop_start;
444
445	/* Set insert_before to the instruction immediately after the JUMP_INSN
446	(or BARRIER), so that any NOTEs between the JUMP_INSN and the end of
447	the loop will be correctly handled by copy_loop_body. */
448	insert_before = NEXT_INSN (last_loop_insn);
449
450	/* Set copy_end to the insn before the jump at the end of the loop. */
451	if (GET_CODE (last_loop_insn) == BARRIER)
452	copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
453	else if (GET_CODE (last_loop_insn) == JUMP_INSN)
454	{
455	#ifdef HAVE_cc0
456	/* The instruction immediately before the JUMP_INSN is a compare
457	instruction which we do not want to copy. */
458	copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
459	#else
460	/* The instruction immediately before the JUMP_INSN may not be the
461	compare, so we must copy it. */
462	copy_end = PREV_INSN (last_loop_insn);
463	#endif
464	}
465	else
466	{
467	/* We currently can't unroll a loop if it doesn't end with a
468	JUMP_INSN. There would need to be a mechanism that recognizes
469	this case, and then inserts a jump after each loop body, which
470	jumps to after the last loop body. */
471	if (loop_dump_stream)
472	fprintf (loop_dump_stream,
473	"Unrolling failure: loop does not end with a JUMP_INSN.\n");
474	return;
475	}
476	}
477	else if (unroll_type == UNROLL_MODULO)
478	{
479	/* Partially unrolling the loop: The compare and branch at the end
480	(the last two instructions) must remain. Don't copy the compare
481	and branch instructions at the end of the loop. Insert the unrolled
482	code immediately before the compare/branch at the end so that the
483	code will fall through to them as before. */
484
485	copy_start = loop_start;
486
487	/* Set insert_before to the jump insn at the end of the loop.
488	Set copy_end to before the jump insn at the end of the loop. */
489	if (GET_CODE (last_loop_insn) == BARRIER)
490	{
491	insert_before = PREV_INSN (last_loop_insn);
492	copy_end = PREV_INSN (insert_before);
493	}
494	else if (GET_CODE (last_loop_insn) == JUMP_INSN)
495	{
496	#ifdef HAVE_cc0
497	/* The instruction immediately before the JUMP_INSN is a compare
498	instruction which we do not want to copy or delete. */
499	insert_before = PREV_INSN (last_loop_insn);
500	copy_end = PREV_INSN (insert_before);
501	#else
502	/* The instruction immediately before the JUMP_INSN may not be the
503	compare, so we must copy it. */
504	insert_before = last_loop_insn;
505	copy_end = PREV_INSN (last_loop_insn);
506	#endif
507	}
508	else
509	{
510	/* We currently can't unroll a loop if it doesn't end with a
511	JUMP_INSN. There would need to be a mechanism that recognizes
512	this case, and then inserts a jump after each loop body, which
513	jumps to after the last loop body. */
514	if (loop_dump_stream)
515	fprintf (loop_dump_stream,
516	"Unrolling failure: loop does not end with a JUMP_INSN.\n");
517	return;
518	}
519	}
520	else
521	{
522	/* Normal case: Must copy the compare and branch instructions at the
523	end of the loop. */
524
525	if (GET_CODE (last_loop_insn) == BARRIER)
526	{
527	/* Loop ends with an unconditional jump and a barrier.
528	Handle this like above, don't copy jump and barrier.
529	This is not strictly necessary, but doing so prevents generating
530	unconditional jumps to an immediately following label.
531
532	This will be corrected below if the target of this jump is
533	not the start_label. */
534
535	insert_before = PREV_INSN (last_loop_insn);
536	copy_end = PREV_INSN (insert_before);
537	}
538	else if (GET_CODE (last_loop_insn) == JUMP_INSN)
539	{
540	/* Set insert_before to immediately after the JUMP_INSN, so that
541	NOTEs at the end of the loop will be correctly handled by
542	copy_loop_body. */
543	insert_before = NEXT_INSN (last_loop_insn);
544	copy_end = last_loop_insn;
545	}
546	else
547	{
548	/* We currently can't unroll a loop if it doesn't end with a
549	JUMP_INSN. There would need to be a mechanism that recognizes
550	this case, and then inserts a jump after each loop body, which
551	jumps to after the last loop body. */
552	if (loop_dump_stream)
553	fprintf (loop_dump_stream,
554	"Unrolling failure: loop does not end with a JUMP_INSN.\n");
555	return;
556	}
557
558	/* If copying exit test branches because they can not be eliminated,
559	then must convert the fall through case of the branch to a jump past
560	the end of the loop. Create a label to emit after the loop and save
561	it for later use. Do not use the label after the loop, if any, since
562	it might be used by insns outside the loop, or there might be insns
563	added before it later by final_[bg]iv_value which must be after
564	the real exit label. */
565	exit_label = gen_label_rtx ();
566
567	insn = loop_start;
568	while (GET_CODE (insn) != CODE_LABEL && GET_CODE (insn) != JUMP_INSN)
569	insn = NEXT_INSN (insn);
570
571	if (GET_CODE (insn) == JUMP_INSN)
572	{
573	/* The loop starts with a jump down to the exit condition test.
574	Start copying the loop after the barrier following this
575	jump insn. */
576	copy_start = NEXT_INSN (insn);
577
578	/* Splitting induction variables doesn't work when the loop is
579	entered via a jump to the bottom, because then we end up doing
580	a comparison against a new register for a split variable, but
581	we did not execute the set insn for the new register because
582	it was skipped over. */
583	splitting_not_safe = 1;
584	if (loop_dump_stream)
585	fprintf (loop_dump_stream,
586	"Splitting not safe, because loop not entered at top.\n");
587	}
588	else
589	copy_start = loop_start;
590	}
591
592	/* This should always be the first label in the loop. */
593	start_label = NEXT_INSN (copy_start);
594	/* There may be a line number note and/or a loop continue note here. */
595	while (GET_CODE (start_label) == NOTE)
596	start_label = NEXT_INSN (start_label);
597	if (GET_CODE (start_label) != CODE_LABEL)
598	{
599	/* This can happen as a result of jump threading. If the first insns in
600	the loop test the same condition as the loop's backward jump, or the
601	opposite condition, then the backward jump will be modified to point
602	to elsewhere, and the loop's start label is deleted.
603
604	This case currently can not be handled by the loop unrolling code. */
605
606	if (loop_dump_stream)
607	fprintf (loop_dump_stream,
608	"Unrolling failure: unknown insns between BEG note and loop label.\n");
609	return;
610	}
611	if (LABEL_NAME (start_label))
612	{
613	/* The jump optimization pass must have combined the original start label
614	with a named label for a goto. We can't unroll this case because
615	jumps which go to the named label must be handled differently than
616	jumps to the loop start, and it is impossible to differentiate them
617	in this case. */
618	if (loop_dump_stream)
619	fprintf (loop_dump_stream,
620	"Unrolling failure: loop start label is gone\n");
621	return;
622	}
623
624	if (unroll_type == UNROLL_NAIVE
625	&& GET_CODE (last_loop_insn) == BARRIER
626	&& start_label != JUMP_LABEL (PREV_INSN (last_loop_insn)))
627	{
628	/* In this case, we must copy the jump and barrier, because they will
629	not be converted to jumps to an immediately following label. */
630
631	insert_before = NEXT_INSN (last_loop_insn);
632	copy_end = last_loop_insn;
633	}
634
635	/* Allocate a translation table for the labels and insn numbers.
636	They will be filled in as we copy the insns in the loop. */
637
638	max_labelno = max_label_num ();
639	max_insnno = get_max_uid ();
640
641	map = (struct inline_remap *) alloca (sizeof (struct inline_remap));
642
643	map->integrating = 0;
644
645	/* Allocate the label map. */
646
647	if (max_labelno > 0)
648	{
649	map->label_map = (rtx ) alloca (max_labelno sizeof (rtx));
650
651	local_label = (char *) alloca (max_labelno);
652	bzero (local_label, max_labelno);
653	}
654	else
655	map->label_map = 0;
656
657	/* Search the loop and mark all local labels, i.e. the ones which have to
658	be distinct labels when copied. For all labels which might be
659	non-local, set their label_map entries to point to themselves.
660	If they happen to be local their label_map entries will be overwritten
661	before the loop body is copied. The label_map entries for local labels
662	will be set to a different value each time the loop body is copied. */
663
664	for (insn = copy_start; insn != loop_end; insn = NEXT_INSN (insn))
665	{
666	if (GET_CODE (insn) == CODE_LABEL)
667	local_label[CODE_LABEL_NUMBER (insn)] = 1;
668	else if (GET_CODE (insn) == JUMP_INSN)
669	{
670	if (JUMP_LABEL (insn))
671	map->label_map[CODE_LABEL_NUMBER (JUMP_LABEL (insn))]
672	= JUMP_LABEL (insn);
673	else if (GET_CODE (PATTERN (insn)) == ADDR_VEC
674	\|\| GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)
675	{
676	rtx pat = PATTERN (insn);
677	int diff_vec_p = GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC;
678	int len = XVECLEN (pat, diff_vec_p);
679	rtx label;
680
681	for (i = 0; i < len; i++)
682	{
683	label = XEXP (XVECEXP (pat, diff_vec_p, i), 0);
684	map->label_map[CODE_LABEL_NUMBER (label)] = label;
685	}
686	}
687	}
688	}
689
690	/* Allocate space for the insn map. */
691
692	map->insn_map = (rtx ) alloca (max_insnno sizeof (rtx));
693
694	/* Set this to zero, to indicate that we are doing loop unrolling,
695	not function inlining. */
696	map->inline_target = 0;
697
698	/* The register and constant maps depend on the number of registers
699	present, so the final maps can't be created until after
700	find_splittable_regs is called. However, they are needed for
701	preconditioning, so we create temporary maps when preconditioning
702	is performed. */
703
704	/* The preconditioning code may allocate two new pseudo registers. */
705	maxregnum = max_reg_num ();
706
707	/* Allocate and zero out the splittable_regs and addr_combined_regs
708	arrays. These must be zeroed here because they will be used if
709	loop preconditioning is performed, and must be zero for that case.
710
711	It is safe to do this here, since the extra registers created by the
712	preconditioning code and find_splittable_regs will never be used
713	to access the splittable_regs[] and addr_combined_regs[] arrays. */
714
715	splittable_regs = (rtx ) alloca (maxregnum sizeof (rtx));
716	bzero ((char ) splittable_regs, maxregnum sizeof (rtx));
717	splittable_regs_updates = (int ) alloca (maxregnum sizeof (int));
718	bzero ((char ) splittable_regs_updates, maxregnum sizeof (int));
719	addr_combined_regs
720	= (struct induction *) alloca (maxregnum sizeof (struct induction *));
721	bzero ((char ) addr_combined_regs, maxregnum sizeof (struct induction *));
722	/* We must limit it to max_reg_before_loop, because only these pseudo
723	registers have valid regno_first_uid info. Any register created after
724	that is unlikely to be local to the loop anyways. */
725	local_regno = (char *) alloca (max_reg_before_loop);
726	bzero (local_regno, max_reg_before_loop);
727
728	/* Mark all local registers, i.e. the ones which are referenced only
729	inside the loop. */
730	if (INSN_UID (copy_end) < max_uid_for_loop)
731	{
732	int copy_start_luid = INSN_LUID (copy_start);
733	int copy_end_luid = INSN_LUID (copy_end);
734
735	/* If a register is used in the jump insn, we must not duplicate it
736	since it will also be used outside the loop. */
737	if (GET_CODE (copy_end) == JUMP_INSN)
738	copy_end_luid--;
739	/* If copy_start points to the NOTE that starts the loop, then we must
740	use the next luid, because invariant pseudo-regs moved out of the loop
741	have their lifetimes modified to start here, but they are not safe
742	to duplicate. */
743	if (copy_start == loop_start)
744	copy_start_luid++;
745
746	for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; ++j)
747	if (regno_first_uid[j] > 0 && regno_first_uid[j] <= max_uid_for_loop
748	&& uid_luid[regno_first_uid[j]] >= copy_start_luid
749	&& regno_last_uid[j] > 0 && regno_last_uid[j] <= max_uid_for_loop
750	&& uid_luid[regno_last_uid[j]] <= copy_end_luid)
751	local_regno[j] = 1;
752	}
753
754	/* If this loop requires exit tests when unrolled, check to see if we
755	can precondition the loop so as to make the exit tests unnecessary.
756	Just like variable splitting, this is not safe if the loop is entered
757	via a jump to the bottom. Also, can not do this if no strength
758	reduce info, because precondition_loop_p uses this info. */
759
760	/* Must copy the loop body for preconditioning before the following
761	find_splittable_regs call since that will emit insns which need to
762	be after the preconditioned loop copies, but immediately before the
763	unrolled loop copies. */
764
765	/* Also, it is not safe to split induction variables for the preconditioned
766	copies of the loop body. If we split induction variables, then the code
767	assumes that each induction variable can be represented as a function
768	of its initial value and the loop iteration number. This is not true
769	in this case, because the last preconditioned copy of the loop body
770	could be any iteration from the first up to the `unroll_number-1'th,
771	depending on the initial value of the iteration variable. Therefore
772	we can not split induction variables here, because we can not calculate
773	their value. Hence, this code must occur before find_splittable_regs
774	is called. */
775
776	if (unroll_type == UNROLL_NAIVE && ! splitting_not_safe && strength_reduce_p)
777	{
778	rtx initial_value, final_value, increment;
779
780	if (precondition_loop_p (&initial_value, &final_value, &increment,
781	loop_start, loop_end))
782	{
783	register rtx diff, temp;
784	enum machine_mode mode;
785	rtx *labels;
786	int abs_inc, neg_inc;
787
788	map->reg_map = (rtx ) alloca (maxregnum sizeof (rtx));
789
790	map->const_equiv_map = (rtx ) alloca (maxregnum sizeof (rtx));
791	map->const_age_map = (unsigned *) alloca (maxregnum
792	* sizeof (unsigned));
793	map->const_equiv_map_size = maxregnum;
794	global_const_equiv_map = map->const_equiv_map;
795	global_const_equiv_map_size = maxregnum;
796
797	init_reg_map (map, maxregnum);
798
799	/* Limit loop unrolling to 4, since this will make 7 copies of
800	the loop body. */
801	if (unroll_number > 4)
802	unroll_number = 4;
803
804	/* Save the absolute value of the increment, and also whether or
805	not it is negative. */
806	neg_inc = 0;
807	abs_inc = INTVAL (increment);
808	if (abs_inc < 0)
809	{
810	abs_inc = - abs_inc;
811	neg_inc = 1;
812	}
813
814	start_sequence ();
815
816	/* Decide what mode to do these calculations in. Choose the larger
817	of final_value's mode and initial_value's mode, or a full-word if
818	both are constants. */
819	mode = GET_MODE (final_value);
820	if (mode == VOIDmode)
821	{
822	mode = GET_MODE (initial_value);
823	if (mode == VOIDmode)
824	mode = word_mode;
825	}
826	else if (mode != GET_MODE (initial_value)
827	&& (GET_MODE_SIZE (mode)
828	< GET_MODE_SIZE (GET_MODE (initial_value))))
829	mode = GET_MODE (initial_value);
830
831	/* Calculate the difference between the final and initial values.
832	Final value may be a (plus (reg x) (const_int 1)) rtx.
833	Let the following cse pass simplify this if initial value is
834	a constant.
835
836	We must copy the final and initial values here to avoid
837	improperly shared rtl. */
838
839	diff = expand_binop (mode, sub_optab, copy_rtx (final_value),
840	copy_rtx (initial_value), NULL_RTX, 0,
841	OPTAB_LIB_WIDEN);
842
843	/* Now calculate (diff % (unroll * abs (increment))) by using an
844	and instruction. */
845	diff = expand_binop (GET_MODE (diff), and_optab, diff,
846	GEN_INT (unroll_number * abs_inc - 1),
847	NULL_RTX, 0, OPTAB_LIB_WIDEN);
848
849	/* Now emit a sequence of branches to jump to the proper precond
850	loop entry point. */
851
852	labels = (rtx ) alloca (sizeof (rtx) unroll_number);
853	for (i = 0; i < unroll_number; i++)
854	labels[i] = gen_label_rtx ();
855
856	/* Check for the case where the initial value is greater than or equal
857	to the final value. In that case, we want to execute exactly
858	one loop iteration. The code below will fail for this case. */
859
860	emit_cmp_insn (initial_value, final_value, neg_inc ? LE : GE,
861	NULL_RTX, mode, 0, 0);
862	if (neg_inc)
863	emit_jump_insn (gen_ble (labels[1]));
864	else
865	emit_jump_insn (gen_bge (labels[1]));
866	JUMP_LABEL (get_last_insn ()) = labels[1];
867	LABEL_NUSES (labels[1])++;
868
869	/* Assuming the unroll_number is 4, and the increment is 2, then
870	for a negative increment: for a positive increment:
871	diff = 0,1 precond 0 diff = 0,7 precond 0
872	diff = 2,3 precond 3 diff = 1,2 precond 1
873	diff = 4,5 precond 2 diff = 3,4 precond 2
874	diff = 6,7 precond 1 diff = 5,6 precond 3 */
875
876	/* We only need to emit (unroll_number - 1) branches here, the
877	last case just falls through to the following code. */
878
879	/* ??? This would give better code if we emitted a tree of branches
880	instead of the current linear list of branches. */
881
882	for (i = 0; i < unroll_number - 1; i++)
883	{
884	int cmp_const;
885	enum rtx_code cmp_code;
886
887	/* For negative increments, must invert the constant compared
888	against, except when comparing against zero. */
889	if (i == 0)
890	{
891	cmp_const = 0;
892	cmp_code = EQ;
893	}
894	else if (neg_inc)
895	{
896	cmp_const = unroll_number - i;
897	cmp_code = GE;
898	}
899	else
900	{
901	cmp_const = i;
902	cmp_code = LE;
903	}
904
905	emit_cmp_insn (diff, GEN_INT (abs_inc * cmp_const),
906	cmp_code, NULL_RTX, mode, 0, 0);
907
908	if (i == 0)
909	emit_jump_insn (gen_beq (labels[i]));
910	else if (neg_inc)
911	emit_jump_insn (gen_bge (labels[i]));
912	else
913	emit_jump_insn (gen_ble (labels[i]));
914	JUMP_LABEL (get_last_insn ()) = labels[i];
915	LABEL_NUSES (labels[i])++;
916	}
917
918	/* If the increment is greater than one, then we need another branch,
919	to handle other cases equivalent to 0. */
920
921	/* ??? This should be merged into the code above somehow to help
922	simplify the code here, and reduce the number of branches emitted.
923	For the negative increment case, the branch here could easily
924	be merged with the `0' case branch above. For the positive
925	increment case, it is not clear how this can be simplified. */
926
927	if (abs_inc != 1)
928	{
929	int cmp_const;
930	enum rtx_code cmp_code;
931
932	if (neg_inc)
933	{
934	cmp_const = abs_inc - 1;
935	cmp_code = LE;
936	}
937	else
938	{
939	cmp_const = abs_inc * (unroll_number - 1) + 1;
940	cmp_code = GE;
941	}
942
943	emit_cmp_insn (diff, GEN_INT (cmp_const), cmp_code, NULL_RTX,
944	mode, 0, 0);
945
946	if (neg_inc)
947	emit_jump_insn (gen_ble (labels[0]));
948	else
949	emit_jump_insn (gen_bge (labels[0]));
950	JUMP_LABEL (get_last_insn ()) = labels[0];
951	LABEL_NUSES (labels[0])++;
952	}
953
954	sequence = gen_sequence ();
955	end_sequence ();
956	emit_insn_before (sequence, loop_start);
957
958	/* Only the last copy of the loop body here needs the exit
959	test, so set copy_end to exclude the compare/branch here,
960	and then reset it inside the loop when get to the last
961	copy. */
962
963	if (GET_CODE (last_loop_insn) == BARRIER)
964	copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
965	else if (GET_CODE (last_loop_insn) == JUMP_INSN)
966	{
967	#ifdef HAVE_cc0
968	/* The immediately preceding insn is a compare which we do not
969	want to copy. */
970	copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
971	#else
972	/* The immediately preceding insn may not be a compare, so we
973	must copy it. */
974	copy_end = PREV_INSN (last_loop_insn);
975	#endif
976	}
977	else
978	abort ();
979
980	for (i = 1; i < unroll_number; i++)
981	{
982	emit_label_after (labels[unroll_number - i],
983	PREV_INSN (loop_start));
984
985	bzero ((char ) map->insn_map, max_insnno sizeof (rtx));
986	bzero ((char ) map->const_equiv_map, maxregnum sizeof (rtx));
987	bzero ((char *) map->const_age_map,
988	maxregnum * sizeof (unsigned));
989	map->const_age = 0;
990
991	for (j = 0; j < max_labelno; j++)
992	if (local_label[j])
993	map->label_map[j] = gen_label_rtx ();
994
995	for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; j++)
996	if (local_regno[j])
997	map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
998
999	/* The last copy needs the compare/branch insns at the end,
1000	so reset copy_end here if the loop ends with a conditional
1001	branch. */
1002
1003	if (i == unroll_number - 1)
1004	{
1005	if (GET_CODE (last_loop_insn) == BARRIER)
1006	copy_end = PREV_INSN (PREV_INSN (last_loop_insn));
1007	else
1008	copy_end = last_loop_insn;
1009	}
1010
1011	/* None of the copies are the `last_iteration', so just
1012	pass zero for that parameter. */
1013	copy_loop_body (copy_start, copy_end, map, exit_label, 0,
1014	unroll_type, start_label, loop_end,
1015	loop_start, copy_end);
1016	}
1017	emit_label_after (labels[0], PREV_INSN (loop_start));
1018
1019	if (GET_CODE (last_loop_insn) == BARRIER)
1020	{
1021	insert_before = PREV_INSN (last_loop_insn);
1022	copy_end = PREV_INSN (insert_before);
1023	}
1024	else
1025	{
1026	#ifdef HAVE_cc0
1027	/* The immediately preceding insn is a compare which we do not
1028	want to copy. */
1029	insert_before = PREV_INSN (last_loop_insn);
1030	copy_end = PREV_INSN (insert_before);
1031	#else
1032	/* The immediately preceding insn may not be a compare, so we
1033	must copy it. */
1034	insert_before = last_loop_insn;
1035	copy_end = PREV_INSN (last_loop_insn);
1036	#endif
1037	}
1038
1039	/* Set unroll type to MODULO now. */
1040	unroll_type = UNROLL_MODULO;
1041	loop_preconditioned = 1;
1042	}
1043	}
1044
1045	/* If reach here, and the loop type is UNROLL_NAIVE, then don't unroll
1046	the loop unless all loops are being unrolled. */
1047	if (unroll_type == UNROLL_NAIVE && ! flag_unroll_all_loops)
1048	{
1049	if (loop_dump_stream)
1050	fprintf (loop_dump_stream, "Unrolling failure: Naive unrolling not being done.\n");
1051	return;
1052	}
1053
1054	/* At this point, we are guaranteed to unroll the loop. */
1055
1056	/* For each biv and giv, determine whether it can be safely split into
1057	a different variable for each unrolled copy of the loop body.
1058	We precalculate and save this info here, since computing it is
1059	expensive.
1060
1061	Do this before deleting any instructions from the loop, so that
1062	back_branch_in_range_p will work correctly. */
1063
1064	if (splitting_not_safe)
1065	temp = 0;
1066	else
1067	temp = find_splittable_regs (unroll_type, loop_start, loop_end,
1068	end_insert_before, unroll_number);
1069
1070	/* find_splittable_regs may have created some new registers, so must
1071	reallocate the reg_map with the new larger size, and must realloc
1072	the constant maps also. */
1073
1074	maxregnum = max_reg_num ();
1075	map->reg_map = (rtx ) alloca (maxregnum sizeof (rtx));
1076
1077	init_reg_map (map, maxregnum);
1078
1079	/* Space is needed in some of the map for new registers, so new_maxregnum
1080	is an (over)estimate of how many registers will exist at the end. */
1081	new_maxregnum = maxregnum + (temp * unroll_number * 2);
1082
1083	/* Must realloc space for the constant maps, because the number of registers
1084	may have changed. */
1085
1086	map->const_equiv_map = (rtx ) alloca (new_maxregnum sizeof (rtx));
1087	map->const_age_map = (unsigned ) alloca (new_maxregnum sizeof (unsigned));
1088
1089	map->const_equiv_map_size = new_maxregnum;
1090	global_const_equiv_map = map->const_equiv_map;
1091	global_const_equiv_map_size = new_maxregnum;
1092
1093	/* Search the list of bivs and givs to find ones which need to be remapped
1094	when split, and set their reg_map entry appropriately. */
1095
1096	for (bl = loop_iv_list; bl; bl = bl->next)
1097	{
1098	if (REGNO (bl->biv->src_reg) != bl->regno)
1099	map->reg_map[bl->regno] = bl->biv->src_reg;
1100	#if 0
1101	/* Currently, non-reduced/final-value givs are never split. */
1102	for (v = bl->giv; v; v = v->next_iv)
1103	if (REGNO (v->src_reg) != bl->regno)
1104	map->reg_map[REGNO (v->dest_reg)] = v->src_reg;
1105	#endif
1106	}
1107
1108	/* If the loop is being partially unrolled, and the iteration variables
1109	are being split, and are being renamed for the split, then must fix up
1110	the compare/jump instruction at the end of the loop to refer to the new
1111	registers. This compare isn't copied, so the registers used in it
1112	will never be replaced if it isn't done here. */
1113
1114	if (unroll_type == UNROLL_MODULO)
1115	{
1116	insn = NEXT_INSN (copy_end);
1117	if (GET_CODE (insn) == INSN \|\| GET_CODE (insn) == JUMP_INSN)
1118	PATTERN (insn) = remap_split_bivs (PATTERN (insn));
1119	}
1120
1121	/* For unroll_number - 1 times, make a copy of each instruction
1122	between copy_start and copy_end, and insert these new instructions
1123	before the end of the loop. */
1124
1125	for (i = 0; i < unroll_number; i++)
1126	{
1127	bzero ((char ) map->insn_map, max_insnno sizeof (rtx));
1128	bzero ((char ) map->const_equiv_map, new_maxregnum sizeof (rtx));
1129	bzero ((char ) map->const_age_map, new_maxregnum sizeof (unsigned));
1130	map->const_age = 0;
1131
1132	for (j = 0; j < max_labelno; j++)
1133	if (local_label[j])
1134	map->label_map[j] = gen_label_rtx ();
1135
1136	for (j = FIRST_PSEUDO_REGISTER; j < max_reg_before_loop; j++)
1137	if (local_regno[j])
1138	map->reg_map[j] = gen_reg_rtx (GET_MODE (regno_reg_rtx[j]));
1139
1140	/* If loop starts with a branch to the test, then fix it so that
1141	it points to the test of the first unrolled copy of the loop. */
1142	if (i == 0 && loop_start != copy_start)
1143	{
1144	insn = PREV_INSN (copy_start);
1145	pattern = PATTERN (insn);
1146
1147	tem = map->label_map[CODE_LABEL_NUMBER
1148	(XEXP (SET_SRC (pattern), 0))];
1149	SET_SRC (pattern) = gen_rtx (LABEL_REF, VOIDmode, tem);
1150
1151	/* Set the jump label so that it can be used by later loop unrolling
1152	passes. */
1153	JUMP_LABEL (insn) = tem;
1154	LABEL_NUSES (tem)++;
1155	}
1156
1157	copy_loop_body (copy_start, copy_end, map, exit_label,
1158	i == unroll_number - 1, unroll_type, start_label,
1159	loop_end, insert_before, insert_before);
1160	}
1161
1162	/* Before deleting any insns, emit a CODE_LABEL immediately after the last
1163	insn to be deleted. This prevents any runaway delete_insn call from
1164	more insns that it should, as it always stops at a CODE_LABEL. */
1165
1166	/* Delete the compare and branch at the end of the loop if completely
1167	unrolling the loop. Deleting the backward branch at the end also
1168	deletes the code label at the start of the loop. This is done at
1169	the very end to avoid problems with back_branch_in_range_p. */
1170
1171	if (unroll_type == UNROLL_COMPLETELY)
1172	safety_label = emit_label_after (gen_label_rtx (), last_loop_insn);
1173	else
1174	safety_label = emit_label_after (gen_label_rtx (), copy_end);
1175
1176	/* Delete all of the original loop instructions. Don't delete the
1177	LOOP_BEG note, or the first code label in the loop. */
1178
1179	insn = NEXT_INSN (copy_start);
1180	while (insn != safety_label)
1181	{
1182	if (insn != start_label)
1183	insn = delete_insn (insn);
1184	else
1185	insn = NEXT_INSN (insn);
1186	}
1187
1188	/* Can now delete the 'safety' label emitted to protect us from runaway
1189	delete_insn calls. */
1190	if (INSN_DELETED_P (safety_label))
1191	abort ();
1192	delete_insn (safety_label);
1193
1194	/* If exit_label exists, emit it after the loop. Doing the emit here
1195	forces it to have a higher INSN_UID than any insn in the unrolled loop.
1196	This is needed so that mostly_true_jump in reorg.c will treat jumps
1197	to this loop end label correctly, i.e. predict that they are usually
1198	not taken. */
1199	if (exit_label)
1200	emit_label_after (exit_label, loop_end);
1201	}
1202
1203	/* Return true if the loop can be safely, and profitably, preconditioned
1204	so that the unrolled copies of the loop body don't need exit tests.
1205
1206	This only works if final_value, initial_value and increment can be
1207	determined, and if increment is a constant power of 2.
1208	If increment is not a power of 2, then the preconditioning modulo
1209	operation would require a real modulo instead of a boolean AND, and this
1210	is not considered `profitable'. */
1211
1212	/* ??? If the loop is known to be executed very many times, or the machine
1213	has a very cheap divide instruction, then preconditioning is a win even
1214	when the increment is not a power of 2. Use RTX_COST to compute
1215	whether divide is cheap. */
1216
1217	static int
1218	precondition_loop_p (initial_value, final_value, increment, loop_start,
1219	loop_end)
1220	rtx initial_value, final_value, *increment;
1221	rtx loop_start, loop_end;
1222	{
1223
1224	if (loop_n_iterations > 0)
1225	{
1226	*initial_value = const0_rtx;
1227	*increment = const1_rtx;
1228	*final_value = GEN_INT (loop_n_iterations);
1229
1230	if (loop_dump_stream)
1231	fprintf (loop_dump_stream,
1232	"Preconditioning: Success, number of iterations known, %d.\n",
1233	loop_n_iterations);
1234	return 1;
1235	}
1236
1237	if (loop_initial_value == 0)
1238	{
1239	if (loop_dump_stream)
1240	fprintf (loop_dump_stream,
1241	"Preconditioning: Could not find initial value.\n");
1242	return 0;
1243	}
1244	else if (loop_increment == 0)
1245	{
1246	if (loop_dump_stream)
1247	fprintf (loop_dump_stream,
1248	"Preconditioning: Could not find increment value.\n");
1249	return 0;
1250	}
1251	else if (GET_CODE (loop_increment) != CONST_INT)
1252	{
1253	if (loop_dump_stream)
1254	fprintf (loop_dump_stream,
1255	"Preconditioning: Increment not a constant.\n");
1256	return 0;
1257	}
1258	else if ((exact_log2 (INTVAL (loop_increment)) < 0)
1259	&& (exact_log2 (- INTVAL (loop_increment)) < 0))
1260	{
1261	if (loop_dump_stream)
1262	fprintf (loop_dump_stream,
1263	"Preconditioning: Increment not a constant power of 2.\n");
1264	return 0;
1265	}
1266
1267	/* Unsigned_compare and compare_dir can be ignored here, since they do
1268	not matter for preconditioning. */
1269
1270	if (loop_final_value == 0)
1271	{
1272	if (loop_dump_stream)
1273	fprintf (loop_dump_stream,
1274	"Preconditioning: EQ comparison loop.\n");
1275	return 0;
1276	}
1277
1278	/* Must ensure that final_value is invariant, so call invariant_p to
1279	check. Before doing so, must check regno against max_reg_before_loop
1280	to make sure that the register is in the range covered by invariant_p.
1281	If it isn't, then it is most likely a biv/giv which by definition are
1282	not invariant. */
1283	if ((GET_CODE (loop_final_value) == REG
1284	&& REGNO (loop_final_value) >= max_reg_before_loop)
1285	\|\| (GET_CODE (loop_final_value) == PLUS
1286	&& REGNO (XEXP (loop_final_value, 0)) >= max_reg_before_loop)
1287	\|\| ! invariant_p (loop_final_value))
1288	{
1289	if (loop_dump_stream)
1290	fprintf (loop_dump_stream,
1291	"Preconditioning: Final value not invariant.\n");
1292	return 0;
1293	}
1294
1295	/* Fail for floating point values, since the caller of this function
1296	does not have code to deal with them. */
1297	if (GET_MODE_CLASS (GET_MODE (loop_final_value)) == MODE_FLOAT
1298	\|\| GET_MODE_CLASS (GET_MODE (loop_initial_value)) == MODE_FLOAT)
1299	{
1300	if (loop_dump_stream)
1301	fprintf (loop_dump_stream,
1302	"Preconditioning: Floating point final or initial value.\n");
1303	return 0;
1304	}
1305
1306	/* Now set initial_value to be the iteration_var, since that may be a
1307	simpler expression, and is guaranteed to be correct if all of the
1308	above tests succeed.
1309
1310	We can not use the initial_value as calculated, because it will be
1311	one too small for loops of the form "while (i-- > 0)". We can not
1312	emit code before the loop_skip_over insns to fix this problem as this
1313	will then give a number one too large for loops of the form
1314	"while (--i > 0)".
1315
1316	Note that all loops that reach here are entered at the top, because
1317	this function is not called if the loop starts with a jump. */
1318
1319	/* Fail if loop_iteration_var is not live before loop_start, since we need
1320	to test its value in the preconditioning code. */
1321
1322	if (uid_luid[regno_first_uid[REGNO (loop_iteration_var)]]
1323	> INSN_LUID (loop_start))
1324	{
1325	if (loop_dump_stream)
1326	fprintf (loop_dump_stream,
1327	"Preconditioning: Iteration var not live before loop start.\n");
1328	return 0;
1329	}
1330
1331	*initial_value = loop_iteration_var;
1332	*increment = loop_increment;
1333	*final_value = loop_final_value;
1334
1335	/* Success! */
1336	if (loop_dump_stream)
1337	fprintf (loop_dump_stream, "Preconditioning: Successful.\n");
1338	return 1;
1339	}
1340
1341
1342	/* All pseudo-registers must be mapped to themselves. Two hard registers
1343	must be mapped, VIRTUAL_STACK_VARS_REGNUM and VIRTUAL_INCOMING_ARGS_
1344	REGNUM, to avoid function-inlining specific conversions of these
1345	registers. All other hard regs can not be mapped because they may be
1346	used with different
1347	modes. */
1348
1349	static void
1350	init_reg_map (map, maxregnum)
1351	struct inline_remap *map;
1352	int maxregnum;
1353	{
1354	int i;
1355
1356	for (i = maxregnum - 1; i > LAST_VIRTUAL_REGISTER; i--)
1357	map->reg_map[i] = regno_reg_rtx[i];
1358	/* Just clear the rest of the entries. */
1359	for (i = LAST_VIRTUAL_REGISTER; i >= 0; i--)
1360	map->reg_map[i] = 0;
1361
1362	map->reg_map[VIRTUAL_STACK_VARS_REGNUM]
1363	= regno_reg_rtx[VIRTUAL_STACK_VARS_REGNUM];
1364	map->reg_map[VIRTUAL_INCOMING_ARGS_REGNUM]
1365	= regno_reg_rtx[VIRTUAL_INCOMING_ARGS_REGNUM];
1366	}
1367
1368	/* Strength-reduction will often emit code for optimized biv/givs which
1369	calculates their value in a temporary register, and then copies the result
1370	to the iv. This procedure reconstructs the pattern computing the iv;
1371	verifying that all operands are of the proper form.
1372
1373	The return value is the amount that the giv is incremented by. */
1374
1375	static rtx
1376	calculate_giv_inc (pattern, src_insn, regno)
1377	rtx pattern, src_insn;
1378	int regno;
1379	{
1380	rtx increment;
1381	rtx increment_total = 0;
1382	int tries = 0;
1383
1384	retry:
1385	/* Verify that we have an increment insn here. First check for a plus
1386	as the set source. */
1387	if (GET_CODE (SET_SRC (pattern)) != PLUS)
1388	{
1389	/* SR sometimes computes the new giv value in a temp, then copies it
1390	to the new_reg. */
1391	src_insn = PREV_INSN (src_insn);
1392	pattern = PATTERN (src_insn);
1393	if (GET_CODE (SET_SRC (pattern)) != PLUS)
1394	abort ();
1395
1396	/* The last insn emitted is not needed, so delete it to avoid confusing
1397	the second cse pass. This insn sets the giv unnecessarily. */
1398	delete_insn (get_last_insn ());
1399	}
1400
1401	/* Verify that we have a constant as the second operand of the plus. */
1402	increment = XEXP (SET_SRC (pattern), 1);
1403	if (GET_CODE (increment) != CONST_INT)
1404	{
1405	/* SR sometimes puts the constant in a register, especially if it is
1406	too big to be an add immed operand. */
1407	src_insn = PREV_INSN (src_insn);
1408	increment = SET_SRC (PATTERN (src_insn));
1409
1410	/* SR may have used LO_SUM to compute the constant if it is too large
1411	for a load immed operand. In this case, the constant is in operand
1412	one of the LO_SUM rtx. */
1413	if (GET_CODE (increment) == LO_SUM)
1414	increment = XEXP (increment, 1);
1415	else if (GET_CODE (increment) == IOR
1416	\|\| GET_CODE (increment) == ASHIFT)
1417	{
1418	/* The rs6000 port loads some constants with IOR.
1419	The alpha port loads some constants with ASHIFT. */
1420	rtx second_part = XEXP (increment, 1);
1421	enum rtx_code code = GET_CODE (increment);
1422
1423	src_insn = PREV_INSN (src_insn);
1424	increment = SET_SRC (PATTERN (src_insn));
1425	/* Don't need the last insn anymore. */
1426	delete_insn (get_last_insn ());
1427
1428	if (GET_CODE (second_part) != CONST_INT
1429	\|\| GET_CODE (increment) != CONST_INT)
1430	abort ();
1431
1432	if (code == IOR)
1433	increment = GEN_INT (INTVAL (increment) \| INTVAL (second_part));
1434	else
1435	increment = GEN_INT (INTVAL (increment) << INTVAL (second_part));
1436	}
1437
1438	if (GET_CODE (increment) != CONST_INT)
1439	abort ();
1440
1441	/* The insn loading the constant into a register is no longer needed,
1442	so delete it. */
1443	delete_insn (get_last_insn ());
1444	}
1445
1446	if (increment_total)
1447	increment_total = GEN_INT (INTVAL (increment_total) + INTVAL (increment));
1448	else
1449	increment_total = increment;
1450
1451	/* Check that the source register is the same as the register we expected
1452	to see as the source. If not, something is seriously wrong. */
1453	if (GET_CODE (XEXP (SET_SRC (pattern), 0)) != REG
1454	\|\| REGNO (XEXP (SET_SRC (pattern), 0)) != regno)
1455	{
1456	/* Some machines (e.g. the romp), may emit two add instructions for
1457	certain constants, so lets try looking for another add immediately
1458	before this one if we have only seen one add insn so far. */
1459
1460	if (tries == 0)
1461	{
1462	tries++;
1463
1464	src_insn = PREV_INSN (src_insn);
1465	pattern = PATTERN (src_insn);
1466
1467	delete_insn (get_last_insn ());
1468
1469	goto retry;
1470	}
1471
1472	abort ();
1473	}
1474
1475	return increment_total;
1476	}
1477
1478	/* Copy REG_NOTES, except for insn references, because not all insn_map
1479	entries are valid yet. We do need to copy registers now though, because
1480	the reg_map entries can change during copying. */
1481
1482	static rtx
1483	initial_reg_note_copy (notes, map)
1484	rtx notes;
1485	struct inline_remap *map;
1486	{
1487	rtx copy;
1488
1489	if (notes == 0)
1490	return 0;
1491
1492	copy = rtx_alloc (GET_CODE (notes));
1493	PUT_MODE (copy, GET_MODE (notes));
1494
1495	if (GET_CODE (notes) == EXPR_LIST)
1496	XEXP (copy, 0) = copy_rtx_and_substitute (XEXP (notes, 0), map);
1497	else if (GET_CODE (notes) == INSN_LIST)
1498	/* Don't substitute for these yet. */
1499	XEXP (copy, 0) = XEXP (notes, 0);
1500	else
1501	abort ();
1502
1503	XEXP (copy, 1) = initial_reg_note_copy (XEXP (notes, 1), map);
1504
1505	return copy;
1506	}
1507
1508	/* Fixup insn references in copied REG_NOTES. */
1509
1510	static void
1511	final_reg_note_copy (notes, map)
1512	rtx notes;
1513	struct inline_remap *map;
1514	{
1515	rtx note;
1516
1517	for (note = notes; note; note = XEXP (note, 1))
1518	if (GET_CODE (note) == INSN_LIST)
1519	XEXP (note, 0) = map->insn_map[INSN_UID (XEXP (note, 0))];
1520	}
1521
1522	/* Copy each instruction in the loop, substituting from map as appropriate.
1523	This is very similar to a loop in expand_inline_function. */
1524
1525	static void
1526	copy_loop_body (copy_start, copy_end, map, exit_label, last_iteration,
1527	unroll_type, start_label, loop_end, insert_before,
1528	copy_notes_from)
1529	rtx copy_start, copy_end;
1530	struct inline_remap *map;
1531	rtx exit_label;
1532	int last_iteration;
1533	enum unroll_types unroll_type;
1534	rtx start_label, loop_end, insert_before, copy_notes_from;
1535	{
1536	rtx insn, pattern;
1537	rtx tem, copy;
1538	int dest_reg_was_split, i;
1539	rtx cc0_insn = 0;
1540	rtx final_label = 0;
1541	rtx giv_inc, giv_dest_reg, giv_src_reg;
1542
1543	/* If this isn't the last iteration, then map any references to the
1544	start_label to final_label. Final label will then be emitted immediately
1545	after the end of this loop body if it was ever used.
1546
1547	If this is the last iteration, then map references to the start_label
1548	to itself. */
1549	if (! last_iteration)
1550	{
1551	final_label = gen_label_rtx ();
1552	map->label_map[CODE_LABEL_NUMBER (start_label)] = final_label;
1553	}
1554	else
1555	map->label_map[CODE_LABEL_NUMBER (start_label)] = start_label;
1556
1557	start_sequence ();
1558
1559	insn = copy_start;
1560	do
1561	{
1562	insn = NEXT_INSN (insn);
1563
1564	map->orig_asm_operands_vector = 0;
1565
1566	switch (GET_CODE (insn))
1567	{
1568	case INSN:
1569	pattern = PATTERN (insn);
1570	copy = 0;
1571	giv_inc = 0;
1572
1573	/* Check to see if this is a giv that has been combined with
1574	some split address givs. (Combined in the sense that
1575	`combine_givs' in loop.c has put two givs in the same register.)
1576	In this case, we must search all givs based on the same biv to
1577	find the address givs. Then split the address givs.
1578	Do this before splitting the giv, since that may map the
1579	SET_DEST to a new register. */
1580
1581	if (GET_CODE (pattern) == SET
1582	&& GET_CODE (SET_DEST (pattern)) == REG
1583	&& addr_combined_regs[REGNO (SET_DEST (pattern))])
1584	{
1585	struct iv_class *bl;
1586	struct induction v, tv;
1587	int regno = REGNO (SET_DEST (pattern));
1588
1589	v = addr_combined_regs[REGNO (SET_DEST (pattern))];
1590	bl = reg_biv_class[REGNO (v->src_reg)];
1591
1592	/* Although the giv_inc amount is not needed here, we must call
1593	calculate_giv_inc here since it might try to delete the
1594	last insn emitted. If we wait until later to call it,
1595	we might accidentally delete insns generated immediately
1596	below by emit_unrolled_add. */
1597
1598	giv_inc = calculate_giv_inc (pattern, insn, regno);
1599
1600	/* Now find all address giv's that were combined with this
1601	giv 'v'. */
1602	for (tv = bl->giv; tv; tv = tv->next_iv)
1603	if (tv->giv_type == DEST_ADDR && tv->same == v)
1604	{
1605	int this_giv_inc = INTVAL (giv_inc);
1606
1607	/* Scale this_giv_inc if the multiplicative factors of
1608	the two givs are different. */
1609	if (tv->mult_val != v->mult_val)
1610	this_giv_inc = (this_giv_inc / INTVAL (v->mult_val)
1611	* INTVAL (tv->mult_val));
1612
1613	tv->dest_reg = plus_constant (tv->dest_reg, this_giv_inc);
1614	*tv->location = tv->dest_reg;
1615
1616	if (last_iteration && unroll_type != UNROLL_COMPLETELY)
1617	{
1618	/* Must emit an insn to increment the split address
1619	giv. Add in the const_adjust field in case there
1620	was a constant eliminated from the address. */
1621	rtx value, dest_reg;
1622
1623	/* tv->dest_reg will be either a bare register,
1624	or else a register plus a constant. */
1625	if (GET_CODE (tv->dest_reg) == REG)
1626	dest_reg = tv->dest_reg;
1627	else
1628	dest_reg = XEXP (tv->dest_reg, 0);
1629
1630	/* Check for shared address givs, and avoid
1631	incrementing the shared pseudo reg more than
1632	once. */
1633	if (! tv->same_insn)
1634	{
1635	/* tv->dest_reg may actually be a (PLUS (REG)
1636	(CONST)) here, so we must call plus_constant
1637	to add the const_adjust amount before calling
1638	emit_unrolled_add below. */
1639	value = plus_constant (tv->dest_reg,
1640	tv->const_adjust);
1641
1642	/* The constant could be too large for an add
1643	immediate, so can't directly emit an insn
1644	here. */
1645	emit_unrolled_add (dest_reg, XEXP (value, 0),
1646	XEXP (value, 1));
1647	}
1648
1649	/* Reset the giv to be just the register again, in case
1650	it is used after the set we have just emitted.
1651	We must subtract the const_adjust factor added in
1652	above. */
1653	tv->dest_reg = plus_constant (dest_reg,
1654	- tv->const_adjust);
1655	*tv->location = tv->dest_reg;
1656	}
1657	}
1658	}
1659
1660	/* If this is a setting of a splittable variable, then determine
1661	how to split the variable, create a new set based on this split,
1662	and set up the reg_map so that later uses of the variable will
1663	use the new split variable. */
1664
1665	dest_reg_was_split = 0;
1666
1667	if (GET_CODE (pattern) == SET
1668	&& GET_CODE (SET_DEST (pattern)) == REG
1669	&& splittable_regs[REGNO (SET_DEST (pattern))])
1670	{
1671	int regno = REGNO (SET_DEST (pattern));
1672
1673	dest_reg_was_split = 1;
1674
1675	/* Compute the increment value for the giv, if it wasn't
1676	already computed above. */
1677
1678	if (giv_inc == 0)
1679	giv_inc = calculate_giv_inc (pattern, insn, regno);
1680	giv_dest_reg = SET_DEST (pattern);
1681	giv_src_reg = SET_DEST (pattern);
1682
1683	if (unroll_type == UNROLL_COMPLETELY)
1684	{
1685	/* Completely unrolling the loop. Set the induction
1686	variable to a known constant value. */
1687
1688	/* The value in splittable_regs may be an invariant
1689	value, so we must use plus_constant here. */
1690	splittable_regs[regno]
1691	= plus_constant (splittable_regs[regno], INTVAL (giv_inc));
1692
1693	if (GET_CODE (splittable_regs[regno]) == PLUS)
1694	{
1695	giv_src_reg = XEXP (splittable_regs[regno], 0);
1696	giv_inc = XEXP (splittable_regs[regno], 1);
1697	}
1698	else
1699	{
1700	/* The splittable_regs value must be a REG or a
1701	CONST_INT, so put the entire value in the giv_src_reg
1702	variable. */
1703	giv_src_reg = splittable_regs[regno];
1704	giv_inc = const0_rtx;
1705	}
1706	}
1707	else
1708	{
1709	/* Partially unrolling loop. Create a new pseudo
1710	register for the iteration variable, and set it to
1711	be a constant plus the original register. Except
1712	on the last iteration, when the result has to
1713	go back into the original iteration var register. */
1714
1715	/* Handle bivs which must be mapped to a new register
1716	when split. This happens for bivs which need their
1717	final value set before loop entry. The new register
1718	for the biv was stored in the biv's first struct
1719	induction entry by find_splittable_regs. */
1720
1721	if (regno < max_reg_before_loop
1722	&& reg_iv_type[regno] == BASIC_INDUCT)
1723	{
1724	giv_src_reg = reg_biv_class[regno]->biv->src_reg;
1725	giv_dest_reg = giv_src_reg;
1726	}
1727
1728	#if 0
1729	/* If non-reduced/final-value givs were split, then
1730	this would have to remap those givs also. See
1731	find_splittable_regs. */
1732	#endif
1733
1734	splittable_regs[regno]
1735	= GEN_INT (INTVAL (giv_inc)
1736	+ INTVAL (splittable_regs[regno]));
1737	giv_inc = splittable_regs[regno];
1738
1739	/* Now split the induction variable by changing the dest
1740	of this insn to a new register, and setting its
1741	reg_map entry to point to this new register.
1742
1743	If this is the last iteration, and this is the last insn
1744	that will update the iv, then reuse the original dest,
1745	to ensure that the iv will have the proper value when
1746	the loop exits or repeats.
1747
1748	Using splittable_regs_updates here like this is safe,
1749	because it can only be greater than one if all
1750	instructions modifying the iv are always executed in
1751	order. */
1752
1753	if (! last_iteration
1754	\|\| (splittable_regs_updates[regno]-- != 1))
1755	{
1756	tem = gen_reg_rtx (GET_MODE (giv_src_reg));
1757	giv_dest_reg = tem;
1758	map->reg_map[regno] = tem;
1759	}
1760	else
1761	map->reg_map[regno] = giv_src_reg;
1762	}
1763
1764	/* The constant being added could be too large for an add
1765	immediate, so can't directly emit an insn here. */
1766	emit_unrolled_add (giv_dest_reg, giv_src_reg, giv_inc);
1767	copy = get_last_insn ();
1768	pattern = PATTERN (copy);
1769	}
1770	else
1771	{
1772	pattern = copy_rtx_and_substitute (pattern, map);
1773	copy = emit_insn (pattern);
1774	}
1775	REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1776
1777	#ifdef HAVE_cc0
1778	/* If this insn is setting CC0, it may need to look at
1779	the insn that uses CC0 to see what type of insn it is.
1780	In that case, the call to recog via validate_change will
1781	fail. So don't substitute constants here. Instead,
1782	do it when we emit the following insn.
1783
1784	For example, see the pyr.md file. That machine has signed and
1785	unsigned compares. The compare patterns must check the
1786	following branch insn to see which what kind of compare to
1787	emit.
1788
1789	If the previous insn set CC0, substitute constants on it as
1790	well. */
1791	if (sets_cc0_p (PATTERN (copy)) != 0)
1792	cc0_insn = copy;
1793	else
1794	{
1795	if (cc0_insn)
1796	try_constants (cc0_insn, map);
1797	cc0_insn = 0;
1798	try_constants (copy, map);
1799	}
1800	#else
1801	try_constants (copy, map);
1802	#endif
1803
1804	/* Make split induction variable constants `permanent' since we
1805	know there are no backward branches across iteration variable
1806	settings which would invalidate this. */
1807	if (dest_reg_was_split)
1808	{
1809	int regno = REGNO (SET_DEST (pattern));
1810
1811	if (regno < map->const_equiv_map_size
1812	&& map->const_age_map[regno] == map->const_age)
1813	map->const_age_map[regno] = -1;
1814	}
1815	break;
1816
1817	case JUMP_INSN:
1818	pattern = copy_rtx_and_substitute (PATTERN (insn), map);
1819	copy = emit_jump_insn (pattern);
1820	REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1821
1822	if (JUMP_LABEL (insn) == start_label && insn == copy_end
1823	&& ! last_iteration)
1824	{
1825	/* This is a branch to the beginning of the loop; this is the
1826	last insn being copied; and this is not the last iteration.
1827	In this case, we want to change the original fall through
1828	case to be a branch past the end of the loop, and the
1829	original jump label case to fall_through. */
1830
1831	if (invert_exp (pattern, copy))
1832	{
1833	if (! redirect_exp (&pattern,
1834	map->label_map[CODE_LABEL_NUMBER
1835	(JUMP_LABEL (insn))],
1836	exit_label, copy))
1837	abort ();
1838	}
1839	else
1840	{
1841	rtx jmp;
1842	rtx lab = gen_label_rtx ();
1843	/* Can't do it by reversing the jump (probably because we
1844	couldn't reverse the conditions), so emit a new
1845	jump_insn after COPY, and redirect the jump around
1846	that. */
1847	jmp = emit_jump_insn_after (gen_jump (exit_label), copy);
1848	jmp = emit_barrier_after (jmp);
1849	emit_label_after (lab, jmp);
1850	LABEL_NUSES (lab) = 0;
1851	if (! redirect_exp (&pattern,
1852	map->label_map[CODE_LABEL_NUMBER
1853	(JUMP_LABEL (insn))],
1854	lab, copy))
1855	abort ();
1856	}
1857	}
1858
1859	#ifdef HAVE_cc0
1860	if (cc0_insn)
1861	try_constants (cc0_insn, map);
1862	cc0_insn = 0;
1863	#endif
1864	try_constants (copy, map);
1865
1866	/* Set the jump label of COPY correctly to avoid problems with
1867	later passes of unroll_loop, if INSN had jump label set. */
1868	if (JUMP_LABEL (insn))
1869	{
1870	rtx label = 0;
1871
1872	/* Can't use the label_map for every insn, since this may be
1873	the backward branch, and hence the label was not mapped. */
1874	if (GET_CODE (pattern) == SET)
1875	{
1876	tem = SET_SRC (pattern);
1877	if (GET_CODE (tem) == LABEL_REF)
1878	label = XEXP (tem, 0);
1879	else if (GET_CODE (tem) == IF_THEN_ELSE)
1880	{
1881	if (XEXP (tem, 1) != pc_rtx)
1882	label = XEXP (XEXP (tem, 1), 0);
1883	else
1884	label = XEXP (XEXP (tem, 2), 0);
1885	}
1886	}
1887
1888	if (label && GET_CODE (label) == CODE_LABEL)
1889	JUMP_LABEL (copy) = label;
1890	else
1891	{
1892	/* An unrecognizable jump insn, probably the entry jump
1893	for a switch statement. This label must have been mapped,
1894	so just use the label_map to get the new jump label. */
1895	JUMP_LABEL (copy)
1896	= map->label_map[CODE_LABEL_NUMBER (JUMP_LABEL (insn))];
1897	}
1898
1899	/* If this is a non-local jump, then must increase the label
1900	use count so that the label will not be deleted when the
1901	original jump is deleted. */
1902	LABEL_NUSES (JUMP_LABEL (copy))++;
1903	}
1904	else if (GET_CODE (PATTERN (copy)) == ADDR_VEC
1905	\|\| GET_CODE (PATTERN (copy)) == ADDR_DIFF_VEC)
1906	{
1907	rtx pat = PATTERN (copy);
1908	int diff_vec_p = GET_CODE (pat) == ADDR_DIFF_VEC;
1909	int len = XVECLEN (pat, diff_vec_p);
1910	int i;
1911
1912	for (i = 0; i < len; i++)
1913	LABEL_NUSES (XEXP (XVECEXP (pat, diff_vec_p, i), 0))++;
1914	}
1915
1916	/* If this used to be a conditional jump insn but whose branch
1917	direction is now known, we must do something special. */
1918	if (condjump_p (insn) && !simplejump_p (insn) && map->last_pc_value)
1919	{
1920	#ifdef HAVE_cc0
1921	/* The previous insn set cc0 for us. So delete it. */
1922	delete_insn (PREV_INSN (copy));
1923	#endif
1924
1925	/* If this is now a no-op, delete it. */
1926	if (map->last_pc_value == pc_rtx)
1927	{
1928	/* Don't let delete_insn delete the label referenced here,
1929	because we might possibly need it later for some other
1930	instruction in the loop. */
1931	if (JUMP_LABEL (copy))
1932	LABEL_NUSES (JUMP_LABEL (copy))++;
1933	delete_insn (copy);
1934	if (JUMP_LABEL (copy))
1935	LABEL_NUSES (JUMP_LABEL (copy))--;
1936	copy = 0;
1937	}
1938	else
1939	/* Otherwise, this is unconditional jump so we must put a
1940	BARRIER after it. We could do some dead code elimination
1941	here, but jump.c will do it just as well. */
1942	emit_barrier ();
1943	}
1944	break;
1945
1946	case CALL_INSN:
1947	pattern = copy_rtx_and_substitute (PATTERN (insn), map);
1948	copy = emit_call_insn (pattern);
1949	REG_NOTES (copy) = initial_reg_note_copy (REG_NOTES (insn), map);
1950
1951	/* Because the USAGE information potentially contains objects other
1952	than hard registers, we need to copy it. */
1953	CALL_INSN_FUNCTION_USAGE (copy) =
1954	copy_rtx_and_substitute (CALL_INSN_FUNCTION_USAGE (insn), map);
1955
1956	#ifdef HAVE_cc0
1957	if (cc0_insn)
1958	try_constants (cc0_insn, map);
1959	cc0_insn = 0;
1960	#endif
1961	try_constants (copy, map);
1962
1963	/* Be lazy and assume CALL_INSNs clobber all hard registers. */
1964	for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
1965	map->const_equiv_map[i] = 0;
1966	break;
1967
1968	case CODE_LABEL:
1969	/* If this is the loop start label, then we don't need to emit a
1970	copy of this label since no one will use it. */
1971
1972	if (insn != start_label)
1973	{
1974	copy = emit_label (map->label_map[CODE_LABEL_NUMBER (insn)]);
1975	map->const_age++;
1976	}
1977	break;
1978
1979	case BARRIER:
1980	copy = emit_barrier ();
1981	break;
1982
1983	case NOTE:
1984	/* VTOP notes are valid only before the loop exit test. If placed
1985	anywhere else, loop may generate bad code. */
1986
1987	if (NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED
1988	&& (NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_VTOP
1989	\|\| (last_iteration && unroll_type != UNROLL_COMPLETELY)))
1990	copy = emit_note (NOTE_SOURCE_FILE (insn),
1991	NOTE_LINE_NUMBER (insn));
1992	else
1993	copy = 0;
1994	break;
1995
1996	default:
1997	abort ();
1998	break;
1999	}
2000
2001	map->insn_map[INSN_UID (insn)] = copy;
2002	}
2003	while (insn != copy_end);
2004
2005	/* Now finish coping the REG_NOTES. */
2006	insn = copy_start;
2007	do
2008	{
2009	insn = NEXT_INSN (insn);
2010	if ((GET_CODE (insn) == INSN \|\| GET_CODE (insn) == JUMP_INSN
2011	\|\| GET_CODE (insn) == CALL_INSN)
2012	&& map->insn_map[INSN_UID (insn)])
2013	final_reg_note_copy (REG_NOTES (map->insn_map[INSN_UID (insn)]), map);
2014	}
2015	while (insn != copy_end);
2016
2017	/* There may be notes between copy_notes_from and loop_end. Emit a copy of
2018	each of these notes here, since there may be some important ones, such as
2019	NOTE_INSN_BLOCK_END notes, in this group. We don't do this on the last
2020	iteration, because the original notes won't be deleted.
2021
2022	We can't use insert_before here, because when from preconditioning,
2023	insert_before points before the loop. We can't use copy_end, because
2024	there may be insns already inserted after it (which we don't want to
2025	copy) when not from preconditioning code. */
2026
2027	if (! last_iteration)
2028	{
2029	for (insn = copy_notes_from; insn != loop_end; insn = NEXT_INSN (insn))
2030	{
2031	if (GET_CODE (insn) == NOTE
2032	&& NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED)
2033	emit_note (NOTE_SOURCE_FILE (insn), NOTE_LINE_NUMBER (insn));
2034	}
2035	}
2036
2037	if (final_label && LABEL_NUSES (final_label) > 0)
2038	emit_label (final_label);
2039
2040	tem = gen_sequence ();
2041	end_sequence ();
2042	emit_insn_before (tem, insert_before);
2043	}
2044
2045	/* Emit an insn, using the expand_binop to ensure that a valid insn is
2046	emitted. This will correctly handle the case where the increment value
2047	won't fit in the immediate field of a PLUS insns. */
2048
2049	void
2050	emit_unrolled_add (dest_reg, src_reg, increment)
2051	rtx dest_reg, src_reg, increment;
2052	{
2053	rtx result;
2054
2055	result = expand_binop (GET_MODE (dest_reg), add_optab, src_reg, increment,
2056	dest_reg, 0, OPTAB_LIB_WIDEN);
2057
2058	if (dest_reg != result)
2059	emit_move_insn (dest_reg, result);
2060	}
2061
2062	/* Searches the insns between INSN and LOOP_END. Returns 1 if there
2063	is a backward branch in that range that branches to somewhere between
2064	LOOP_START and INSN. Returns 0 otherwise. */
2065
2066	/* ??? This is quadratic algorithm. Could be rewritten to be linear.
2067	In practice, this is not a problem, because this function is seldom called,
2068	and uses a negligible amount of CPU time on average. */
2069
2070	int
2071	back_branch_in_range_p (insn, loop_start, loop_end)
2072	rtx insn;
2073	rtx loop_start, loop_end;
2074	{
2075	rtx p, q, target_insn;
2076
2077	/* Stop before we get to the backward branch at the end of the loop. */
2078	loop_end = prev_nonnote_insn (loop_end);
2079	if (GET_CODE (loop_end) == BARRIER)
2080	loop_end = PREV_INSN (loop_end);
2081
2082	/* Check in case insn has been deleted, search forward for first non
2083	deleted insn following it. */
2084	while (INSN_DELETED_P (insn))
2085	insn = NEXT_INSN (insn);
2086
2087	/* Check for the case where insn is the last insn in the loop. */
2088	if (insn == loop_end)
2089	return 0;
2090
2091	for (p = NEXT_INSN (insn); p != loop_end; p = NEXT_INSN (p))
2092	{
2093	if (GET_CODE (p) == JUMP_INSN)
2094	{
2095	target_insn = JUMP_LABEL (p);
2096
2097	/* Search from loop_start to insn, to see if one of them is
2098	the target_insn. We can't use INSN_LUID comparisons here,
2099	since insn may not have an LUID entry. */
2100	for (q = loop_start; q != insn; q = NEXT_INSN (q))
2101	if (q == target_insn)
2102	return 1;
2103	}
2104	}
2105
2106	return 0;
2107	}
2108
2109	/* Try to generate the simplest rtx for the expression
2110	(PLUS (MULT mult1 mult2) add1). This is used to calculate the initial
2111	value of giv's. */
2112
2113	static rtx
2114	fold_rtx_mult_add (mult1, mult2, add1, mode)
2115	rtx mult1, mult2, add1;
2116	enum machine_mode mode;
2117	{
2118	rtx temp, mult_res;
2119	rtx result;
2120
2121	/* The modes must all be the same. This should always be true. For now,
2122	check to make sure. */
2123	if ((GET_MODE (mult1) != mode && GET_MODE (mult1) != VOIDmode)
2124	\|\| (GET_MODE (mult2) != mode && GET_MODE (mult2) != VOIDmode)
2125	\|\| (GET_MODE (add1) != mode && GET_MODE (add1) != VOIDmode))
2126	abort ();
2127
2128	/* Ensure that if at least one of mult1/mult2 are constant, then mult2
2129	will be a constant. */
2130	if (GET_CODE (mult1) == CONST_INT)
2131	{
2132	temp = mult2;
2133	mult2 = mult1;
2134	mult1 = temp;
2135	}
2136
2137	mult_res = simplify_binary_operation (MULT, mode, mult1, mult2);
2138	if (! mult_res)
2139	mult_res = gen_rtx (MULT, mode, mult1, mult2);
2140
2141	/* Again, put the constant second. */
2142	if (GET_CODE (add1) == CONST_INT)
2143	{
2144	temp = add1;
2145	add1 = mult_res;
2146	mult_res = temp;
2147	}
2148
2149	result = simplify_binary_operation (PLUS, mode, add1, mult_res);
2150	if (! result)
2151	result = gen_rtx (PLUS, mode, add1, mult_res);
2152
2153	return result;
2154	}
2155
2156	/* Searches the list of induction struct's for the biv BL, to try to calculate
2157	the total increment value for one iteration of the loop as a constant.
2158
2159	Returns the increment value as an rtx, simplified as much as possible,
2160	if it can be calculated. Otherwise, returns 0. */
2161
2162	rtx
2163	biv_total_increment (bl, loop_start, loop_end)
2164	struct iv_class *bl;
2165	rtx loop_start, loop_end;
2166	{
2167	struct induction *v;
2168	rtx result;
2169
2170	/* For increment, must check every instruction that sets it. Each
2171	instruction must be executed only once each time through the loop.
2172	To verify this, we check that the the insn is always executed, and that
2173	there are no backward branches after the insn that branch to before it.
2174	Also, the insn must have a mult_val of one (to make sure it really is
2175	an increment). */
2176
2177	result = const0_rtx;
2178	for (v = bl->biv; v; v = v->next_iv)
2179	{
2180	if (v->always_computable && v->mult_val == const1_rtx
2181	&& ! back_branch_in_range_p (v->insn, loop_start, loop_end))
2182	result = fold_rtx_mult_add (result, const1_rtx, v->add_val, v->mode);
2183	else
2184	return 0;
2185	}
2186
2187	return result;
2188	}
2189
2190	/* Determine the initial value of the iteration variable, and the amount
2191	that it is incremented each loop. Use the tables constructed by
2192	the strength reduction pass to calculate these values.
2193
2194	Initial_value and/or increment are set to zero if their values could not
2195	be calculated. */
2196
2197	static void
2198	iteration_info (iteration_var, initial_value, increment, loop_start, loop_end)
2199	rtx iteration_var, initial_value, increment;
2200	rtx loop_start, loop_end;
2201	{
2202	struct iv_class *bl;
2203	struct induction v, b;
2204
2205	/* Clear the result values, in case no answer can be found. */
2206	*initial_value = 0;
2207	*increment = 0;
2208
2209	/* The iteration variable can be either a giv or a biv. Check to see
2210	which it is, and compute the variable's initial value, and increment
2211	value if possible. */
2212
2213	/* If this is a new register, can't handle it since we don't have any
2214	reg_iv_type entry for it. */
2215	if (REGNO (iteration_var) >= max_reg_before_loop)
2216	{
2217	if (loop_dump_stream)
2218	fprintf (loop_dump_stream,
2219	"Loop unrolling: No reg_iv_type entry for iteration var.\n");
2220	return;
2221	}
2222	/* Reject iteration variables larger than the host long size, since they
2223	could result in a number of iterations greater than the range of our
2224	`unsigned long' variable loop_n_iterations. */
2225	else if (GET_MODE_BITSIZE (GET_MODE (iteration_var)) > HOST_BITS_PER_LONG)
2226	{
2227	if (loop_dump_stream)
2228	fprintf (loop_dump_stream,
2229	"Loop unrolling: Iteration var rejected because mode larger than host long.\n");
2230	return;
2231	}
2232	else if (GET_MODE_CLASS (GET_MODE (iteration_var)) != MODE_INT)
2233	{
2234	if (loop_dump_stream)
2235	fprintf (loop_dump_stream,
2236	"Loop unrolling: Iteration var not an integer.\n");
2237	return;
2238	}
2239	else if (reg_iv_type[REGNO (iteration_var)] == BASIC_INDUCT)
2240	{
2241	/* Grab initial value, only useful if it is a constant. */
2242	bl = reg_biv_class[REGNO (iteration_var)];
2243	*initial_value = bl->initial_value;
2244
2245	*increment = biv_total_increment (bl, loop_start, loop_end);
2246	}
2247	else if (reg_iv_type[REGNO (iteration_var)] == GENERAL_INDUCT)
2248	{
2249	#if 1
2250	/* ??? The code below does not work because the incorrect number of
2251	iterations is calculated when the biv is incremented after the giv
2252	is set (which is the usual case). This can probably be accounted
2253	for by biasing the initial_value by subtracting the amount of the
2254	increment that occurs between the giv set and the giv test. However,
2255	a giv as an iterator is very rare, so it does not seem worthwhile
2256	to handle this. */
2257	/* ??? An example failure is: i = 6; do {;} while (i++ < 9). */
2258	if (loop_dump_stream)
2259	fprintf (loop_dump_stream,
2260	"Loop unrolling: Giv iterators are not handled.\n");
2261	return;
2262	#else
2263	/* Initial value is mult_val times the biv's initial value plus
2264	add_val. Only useful if it is a constant. */
2265	v = reg_iv_info[REGNO (iteration_var)];
2266	bl = reg_biv_class[REGNO (v->src_reg)];
2267	*initial_value = fold_rtx_mult_add (v->mult_val, bl->initial_value,
2268	v->add_val, v->mode);
2269
2270	/* Increment value is mult_val times the increment value of the biv. */
2271
2272	*increment = biv_total_increment (bl, loop_start, loop_end);
2273	if (*increment)
2274	increment = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2275	v->mode);
2276	#endif
2277	}
2278	else
2279	{
2280	if (loop_dump_stream)
2281	fprintf (loop_dump_stream,
2282	"Loop unrolling: Not basic or general induction var.\n");
2283	return;
2284	}
2285	}
2286
2287	/* Calculate the approximate final value of the iteration variable
2288	which has an loop exit test with code COMPARISON_CODE and comparison value
2289	of COMPARISON_VALUE. Also returns an indication of whether the comparison
2290	was signed or unsigned, and the direction of the comparison. This info is
2291	needed to calculate the number of loop iterations. */
2292
2293	static rtx
2294	approx_final_value (comparison_code, comparison_value, unsigned_p, compare_dir)
2295	enum rtx_code comparison_code;
2296	rtx comparison_value;
2297	int *unsigned_p;
2298	int *compare_dir;
2299	{
2300	/* Calculate the final value of the induction variable.
2301	The exact final value depends on the branch operator, and increment sign.
2302	This is only an approximate value. It will be wrong if the iteration
2303	variable is not incremented by one each time through the loop, and
2304	approx final value - start value % increment != 0. */
2305
2306	*unsigned_p = 0;
2307	switch (comparison_code)
2308	{
2309	case LEU:
2310	*unsigned_p = 1;
2311	case LE:
2312	*compare_dir = 1;
2313	return plus_constant (comparison_value, 1);
2314	case GEU:
2315	*unsigned_p = 1;
2316	case GE:
2317	*compare_dir = -1;
2318	return plus_constant (comparison_value, -1);
2319	case EQ:
2320	/* Can not calculate a final value for this case. */
2321	*compare_dir = 0;
2322	return 0;
2323	case LTU:
2324	*unsigned_p = 1;
2325	case LT:
2326	*compare_dir = 1;
2327	return comparison_value;
2328	break;
2329	case GTU:
2330	*unsigned_p = 1;
2331	case GT:
2332	*compare_dir = -1;
2333	return comparison_value;
2334	case NE:
2335	*compare_dir = 0;
2336	return comparison_value;
2337	default:
2338	abort ();
2339	}
2340	}
2341
2342	/* For each biv and giv, determine whether it can be safely split into
2343	a different variable for each unrolled copy of the loop body. If it
2344	is safe to split, then indicate that by saving some useful info
2345	in the splittable_regs array.
2346
2347	If the loop is being completely unrolled, then splittable_regs will hold
2348	the current value of the induction variable while the loop is unrolled.
2349	It must be set to the initial value of the induction variable here.
2350	Otherwise, splittable_regs will hold the difference between the current
2351	value of the induction variable and the value the induction variable had
2352	at the top of the loop. It must be set to the value 0 here.
2353
2354	Returns the total number of instructions that set registers that are
2355	splittable. */
2356
2357	/* ?? If the loop is only unrolled twice, then most of the restrictions to
2358	constant values are unnecessary, since we can easily calculate increment
2359	values in this case even if nothing is constant. The increment value
2360	should not involve a multiply however. */
2361
2362	/* ?? Even if the biv/giv increment values aren't constant, it may still
2363	be beneficial to split the variable if the loop is only unrolled a few
2364	times, since multiplies by small integers (1,2,3,4) are very cheap. */
2365
2366	static int
2367	find_splittable_regs (unroll_type, loop_start, loop_end, end_insert_before,
2368	unroll_number)
2369	enum unroll_types unroll_type;
2370	rtx loop_start, loop_end;
2371	rtx end_insert_before;
2372	int unroll_number;
2373	{
2374	struct iv_class *bl;
2375	struct induction *v;
2376	rtx increment, tem;
2377	rtx biv_final_value;
2378	int biv_splittable;
2379	int result = 0;
2380
2381	for (bl = loop_iv_list; bl; bl = bl->next)
2382	{
2383	/* Biv_total_increment must return a constant value,
2384	otherwise we can not calculate the split values. */
2385
2386	increment = biv_total_increment (bl, loop_start, loop_end);
2387	if (! increment \|\| GET_CODE (increment) != CONST_INT)
2388	continue;
2389
2390	/* The loop must be unrolled completely, or else have a known number
2391	of iterations and only one exit, or else the biv must be dead
2392	outside the loop, or else the final value must be known. Otherwise,
2393	it is unsafe to split the biv since it may not have the proper
2394	value on loop exit. */
2395
2396	/* loop_number_exit_count is non-zero if the loop has an exit other than
2397	a fall through at the end. */
2398
2399	biv_splittable = 1;
2400	biv_final_value = 0;
2401	if (unroll_type != UNROLL_COMPLETELY
2402	&& (loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
2403	\|\| unroll_type == UNROLL_NAIVE)
2404	&& (uid_luid[regno_last_uid[bl->regno]] >= INSN_LUID (loop_end)
2405	\|\| ! bl->init_insn
2406	\|\| INSN_UID (bl->init_insn) >= max_uid_for_loop
2407	\|\| (uid_luid[regno_first_uid[bl->regno]]
2408	< INSN_LUID (bl->init_insn))
2409	\|\| reg_mentioned_p (bl->biv->dest_reg, SET_SRC (bl->init_set)))
2410	&& ! (biv_final_value = final_biv_value (bl, loop_start, loop_end)))
2411	biv_splittable = 0;
2412
2413	/* If any of the insns setting the BIV don't do so with a simple
2414	PLUS, we don't know how to split it. */
2415	for (v = bl->biv; biv_splittable && v; v = v->next_iv)
2416	if ((tem = single_set (v->insn)) == 0
2417	\|\| GET_CODE (SET_DEST (tem)) != REG
2418	\|\| REGNO (SET_DEST (tem)) != bl->regno
2419	\|\| GET_CODE (SET_SRC (tem)) != PLUS)
2420	biv_splittable = 0;
2421
2422	/* If final value is non-zero, then must emit an instruction which sets
2423	the value of the biv to the proper value. This is done after
2424	handling all of the givs, since some of them may need to use the
2425	biv's value in their initialization code. */
2426
2427	/* This biv is splittable. If completely unrolling the loop, save
2428	the biv's initial value. Otherwise, save the constant zero. */
2429
2430	if (biv_splittable == 1)
2431	{
2432	if (unroll_type == UNROLL_COMPLETELY)
2433	{
2434	/* If the initial value of the biv is itself (i.e. it is too
2435	complicated for strength_reduce to compute), or is a hard
2436	register, or it isn't invariant, then we must create a new
2437	pseudo reg to hold the initial value of the biv. */
2438
2439	if (GET_CODE (bl->initial_value) == REG
2440	&& (REGNO (bl->initial_value) == bl->regno
2441	\|\| REGNO (bl->initial_value) < FIRST_PSEUDO_REGISTER
2442	\|\| ! invariant_p (bl->initial_value)))
2443	{
2444	rtx tem = gen_reg_rtx (bl->biv->mode);
2445
2446	emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2447	loop_start);
2448
2449	if (loop_dump_stream)
2450	fprintf (loop_dump_stream, "Biv %d initial value remapped to %d.\n",
2451	bl->regno, REGNO (tem));
2452
2453	splittable_regs[bl->regno] = tem;
2454	}
2455	else
2456	splittable_regs[bl->regno] = bl->initial_value;
2457	}
2458	else
2459	splittable_regs[bl->regno] = const0_rtx;
2460
2461	/* Save the number of instructions that modify the biv, so that
2462	we can treat the last one specially. */
2463
2464	splittable_regs_updates[bl->regno] = bl->biv_count;
2465	result += bl->biv_count;
2466
2467	if (loop_dump_stream)
2468	fprintf (loop_dump_stream,
2469	"Biv %d safe to split.\n", bl->regno);
2470	}
2471
2472	/* Check every giv that depends on this biv to see whether it is
2473	splittable also. Even if the biv isn't splittable, givs which
2474	depend on it may be splittable if the biv is live outside the
2475	loop, and the givs aren't. */
2476
2477	result += find_splittable_givs (bl, unroll_type, loop_start, loop_end,
2478	increment, unroll_number);
2479
2480	/* If final value is non-zero, then must emit an instruction which sets
2481	the value of the biv to the proper value. This is done after
2482	handling all of the givs, since some of them may need to use the
2483	biv's value in their initialization code. */
2484	if (biv_final_value)
2485	{
2486	/* If the loop has multiple exits, emit the insns before the
2487	loop to ensure that it will always be executed no matter
2488	how the loop exits. Otherwise emit the insn after the loop,
2489	since this is slightly more efficient. */
2490	if (! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]])
2491	emit_insn_before (gen_move_insn (bl->biv->src_reg,
2492	biv_final_value),
2493	end_insert_before);
2494	else
2495	{
2496	/* Create a new register to hold the value of the biv, and then
2497	set the biv to its final value before the loop start. The biv
2498	is set to its final value before loop start to ensure that
2499	this insn will always be executed, no matter how the loop
2500	exits. */
2501	rtx tem = gen_reg_rtx (bl->biv->mode);
2502	emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2503	loop_start);
2504	emit_insn_before (gen_move_insn (bl->biv->src_reg,
2505	biv_final_value),
2506	loop_start);
2507
2508	if (loop_dump_stream)
2509	fprintf (loop_dump_stream, "Biv %d mapped to %d for split.\n",
2510	REGNO (bl->biv->src_reg), REGNO (tem));
2511
2512	/* Set up the mapping from the original biv register to the new
2513	register. */
2514	bl->biv->src_reg = tem;
2515	}
2516	}
2517	}
2518	return result;
2519	}
2520
2521	/* Return 1 if the first and last unrolled copy of the address giv V is valid
2522	for the instruction that is using it. Do not make any changes to that
2523	instruction. */
2524
2525	static int
2526	verify_addresses (v, giv_inc, unroll_number)
2527	struct induction *v;
2528	rtx giv_inc;
2529	int unroll_number;
2530	{
2531	int ret = 1;
2532	rtx orig_addr = *v->location;
2533	rtx last_addr = plus_constant (v->dest_reg,
2534	INTVAL (giv_inc) * (unroll_number - 1));
2535
2536	/* First check to see if either address would fail. */
2537	if (! validate_change (v->insn, v->location, v->dest_reg, 0)
2538	\|\| ! validate_change (v->insn, v->location, last_addr, 0))
2539	ret = 0;
2540
2541	/* Now put things back the way they were before. This will always
2542	succeed. */
2543	validate_change (v->insn, v->location, orig_addr, 0);
2544
2545	return ret;
2546	}
2547
2548	/* For every giv based on the biv BL, check to determine whether it is
2549	splittable. This is a subroutine to find_splittable_regs ().
2550
2551	Return the number of instructions that set splittable registers. */
2552
2553	static int
2554	find_splittable_givs (bl, unroll_type, loop_start, loop_end, increment,
2555	unroll_number)
2556	struct iv_class *bl;
2557	enum unroll_types unroll_type;
2558	rtx loop_start, loop_end;
2559	rtx increment;
2560	int unroll_number;
2561	{
2562	struct induction v, v2;
2563	rtx final_value;
2564	rtx tem;
2565	int result = 0;
2566
2567	/* Scan the list of givs, and set the same_insn field when there are
2568	multiple identical givs in the same insn. */
2569	for (v = bl->giv; v; v = v->next_iv)
2570	for (v2 = v->next_iv; v2; v2 = v2->next_iv)
2571	if (v->insn == v2->insn && rtx_equal_p (v->new_reg, v2->new_reg)
2572	&& ! v2->same_insn)
2573	v2->same_insn = v;
2574
2575	for (v = bl->giv; v; v = v->next_iv)
2576	{
2577	rtx giv_inc, value;
2578
2579	/* Only split the giv if it has already been reduced, or if the loop is
2580	being completely unrolled. */
2581	if (unroll_type != UNROLL_COMPLETELY && v->ignore)
2582	continue;
2583
2584	/* The giv can be split if the insn that sets the giv is executed once
2585	and only once on every iteration of the loop. */
2586	/* An address giv can always be split. v->insn is just a use not a set,
2587	and hence it does not matter whether it is always executed. All that
2588	matters is that all the biv increments are always executed, and we
2589	won't reach here if they aren't. */
2590	if (v->giv_type != DEST_ADDR
2591	&& (! v->always_computable
2592	\|\| back_branch_in_range_p (v->insn, loop_start, loop_end)))
2593	continue;
2594
2595	/* The giv increment value must be a constant. */
2596	giv_inc = fold_rtx_mult_add (v->mult_val, increment, const0_rtx,
2597	v->mode);
2598	if (! giv_inc \|\| GET_CODE (giv_inc) != CONST_INT)
2599	continue;
2600
2601	/* The loop must be unrolled completely, or else have a known number of
2602	iterations and only one exit, or else the giv must be dead outside
2603	the loop, or else the final value of the giv must be known.
2604	Otherwise, it is not safe to split the giv since it may not have the
2605	proper value on loop exit. */
2606
2607	/* The used outside loop test will fail for DEST_ADDR givs. They are
2608	never used outside the loop anyways, so it is always safe to split a
2609	DEST_ADDR giv. */
2610
2611	final_value = 0;
2612	if (unroll_type != UNROLL_COMPLETELY
2613	&& (loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
2614	\|\| unroll_type == UNROLL_NAIVE)
2615	&& v->giv_type != DEST_ADDR
2616	&& ((regno_first_uid[REGNO (v->dest_reg)] != INSN_UID (v->insn)
2617	/* Check for the case where the pseudo is set by a shift/add
2618	sequence, in which case the first insn setting the pseudo
2619	is the first insn of the shift/add sequence. */
2620	&& (! (tem = find_reg_note (v->insn, REG_RETVAL, NULL_RTX))
2621	\|\| (regno_first_uid[REGNO (v->dest_reg)]
2622	!= INSN_UID (XEXP (tem, 0)))))
2623	/* Line above always fails if INSN was moved by loop opt. */
2624	\|\| (uid_luid[regno_last_uid[REGNO (v->dest_reg)]]
2625	>= INSN_LUID (loop_end)))
2626	&& ! (final_value = v->final_value))
2627	continue;
2628
2629	#if 0
2630	/* Currently, non-reduced/final-value givs are never split. */
2631	/* Should emit insns after the loop if possible, as the biv final value
2632	code below does. */
2633
2634	/* If the final value is non-zero, and the giv has not been reduced,
2635	then must emit an instruction to set the final value. */
2636	if (final_value && !v->new_reg)
2637	{
2638	/* Create a new register to hold the value of the giv, and then set
2639	the giv to its final value before the loop start. The giv is set
2640	to its final value before loop start to ensure that this insn
2641	will always be executed, no matter how we exit. */
2642	tem = gen_reg_rtx (v->mode);
2643	emit_insn_before (gen_move_insn (tem, v->dest_reg), loop_start);
2644	emit_insn_before (gen_move_insn (v->dest_reg, final_value),
2645	loop_start);
2646
2647	if (loop_dump_stream)
2648	fprintf (loop_dump_stream, "Giv %d mapped to %d for split.\n",
2649	REGNO (v->dest_reg), REGNO (tem));
2650
2651	v->src_reg = tem;
2652	}
2653	#endif
2654
2655	/* This giv is splittable. If completely unrolling the loop, save the
2656	giv's initial value. Otherwise, save the constant zero for it. */
2657
2658	if (unroll_type == UNROLL_COMPLETELY)
2659	{
2660	/* It is not safe to use bl->initial_value here, because it may not
2661	be invariant. It is safe to use the initial value stored in
2662	the splittable_regs array if it is set. In rare cases, it won't
2663	be set, so then we do exactly the same thing as
2664	find_splittable_regs does to get a safe value. */
2665	rtx biv_initial_value;
2666
2667	if (splittable_regs[bl->regno])
2668	biv_initial_value = splittable_regs[bl->regno];
2669	else if (GET_CODE (bl->initial_value) != REG
2670	\|\| (REGNO (bl->initial_value) != bl->regno
2671	&& REGNO (bl->initial_value) >= FIRST_PSEUDO_REGISTER))
2672	biv_initial_value = bl->initial_value;
2673	else
2674	{
2675	rtx tem = gen_reg_rtx (bl->biv->mode);
2676
2677	emit_insn_before (gen_move_insn (tem, bl->biv->src_reg),
2678	loop_start);
2679	biv_initial_value = tem;
2680	}
2681	value = fold_rtx_mult_add (v->mult_val, biv_initial_value,
2682	v->add_val, v->mode);
2683	}
2684	else
2685	value = const0_rtx;
2686
2687	if (v->new_reg)
2688	{
2689	/* If a giv was combined with another giv, then we can only split
2690	this giv if the giv it was combined with was reduced. This
2691	is because the value of v->new_reg is meaningless in this
2692	case. */
2693	if (v->same && ! v->same->new_reg)
2694	{
2695	if (loop_dump_stream)
2696	fprintf (loop_dump_stream,
2697	"giv combined with unreduced giv not split.\n");
2698	continue;
2699	}
2700	/* If the giv is an address destination, it could be something other
2701	than a simple register, these have to be treated differently. */
2702	else if (v->giv_type == DEST_REG)
2703	{
2704	/* If value is not a constant, register, or register plus
2705	constant, then compute its value into a register before
2706	loop start. This prevents invalid rtx sharing, and should
2707	generate better code. We can use bl->initial_value here
2708	instead of splittable_regs[bl->regno] because this code
2709	is going before the loop start. */
2710	if (unroll_type == UNROLL_COMPLETELY
2711	&& GET_CODE (value) != CONST_INT
2712	&& GET_CODE (value) != REG
2713	&& (GET_CODE (value) != PLUS
2714	\|\| GET_CODE (XEXP (value, 0)) != REG
2715	\|\| GET_CODE (XEXP (value, 1)) != CONST_INT))
2716	{
2717	rtx tem = gen_reg_rtx (v->mode);
2718	emit_iv_add_mult (bl->initial_value, v->mult_val,
2719	v->add_val, tem, loop_start);
2720	value = tem;
2721	}
2722
2723	splittable_regs[REGNO (v->new_reg)] = value;
2724	}
2725	else
2726	{
2727	/* Splitting address givs is useful since it will often allow us
2728	to eliminate some increment insns for the base giv as
2729	unnecessary. */
2730
2731	/* If the addr giv is combined with a dest_reg giv, then all
2732	references to that dest reg will be remapped, which is NOT
2733	what we want for split addr regs. We always create a new
2734	register for the split addr giv, just to be safe. */
2735
2736	/* ??? If there are multiple address givs which have been
2737	combined with the same dest_reg giv, then we may only need
2738	one new register for them. Pulling out constants below will
2739	catch some of the common cases of this. Currently, I leave
2740	the work of simplifying multiple address givs to the
2741	following cse pass. */
2742
2743	/* As a special case, if we have multiple identical address givs
2744	within a single instruction, then we do use a single pseudo
2745	reg for both. This is necessary in case one is a match_dup
2746	of the other. */
2747
2748	v->const_adjust = 0;
2749
2750	if (v->same_insn)
2751	{
2752	v->dest_reg = v->same_insn->dest_reg;
2753	if (loop_dump_stream)
2754	fprintf (loop_dump_stream,
2755	"Sharing address givs in insn %d\n",
2756	INSN_UID (v->insn));
2757	}
2758	else if (unroll_type != UNROLL_COMPLETELY)
2759	{
2760	/* If not completely unrolling the loop, then create a new
2761	register to hold the split value of the DEST_ADDR giv.
2762	Emit insn to initialize its value before loop start. */
2763	tem = gen_reg_rtx (v->mode);
2764
2765	/* If the address giv has a constant in its new_reg value,
2766	then this constant can be pulled out and put in value,
2767	instead of being part of the initialization code. */
2768
2769	if (GET_CODE (v->new_reg) == PLUS
2770	&& GET_CODE (XEXP (v->new_reg, 1)) == CONST_INT)
2771	{
2772	v->dest_reg
2773	= plus_constant (tem, INTVAL (XEXP (v->new_reg,1)));
2774
2775	/* Only succeed if this will give valid addresses.
2776	Try to validate both the first and the last
2777	address resulting from loop unrolling, if
2778	one fails, then can't do const elim here. */
2779	if (! verify_addresses (v, giv_inc, unroll_number))
2780	{
2781	/* Save the negative of the eliminated const, so
2782	that we can calculate the dest_reg's increment
2783	value later. */
2784	v->const_adjust = - INTVAL (XEXP (v->new_reg, 1));
2785
2786	v->new_reg = XEXP (v->new_reg, 0);
2787	if (loop_dump_stream)
2788	fprintf (loop_dump_stream,
2789	"Eliminating constant from giv %d\n",
2790	REGNO (tem));
2791	}
2792	else
2793	v->dest_reg = tem;
2794	}
2795	else
2796	v->dest_reg = tem;
2797
2798	/* If the address hasn't been checked for validity yet, do so
2799	now, and fail completely if either the first or the last
2800	unrolled copy of the address is not a valid address
2801	for the instruction that uses it. */
2802	if (v->dest_reg == tem
2803	&& ! verify_addresses (v, giv_inc, unroll_number))
2804	{
2805	if (loop_dump_stream)
2806	fprintf (loop_dump_stream,
2807	"Invalid address for giv at insn %d\n",
2808	INSN_UID (v->insn));
2809	continue;
2810	}
2811
2812	/* To initialize the new register, just move the value of
2813	new_reg into it. This is not guaranteed to give a valid
2814	instruction on machines with complex addressing modes.
2815	If we can't recognize it, then delete it and emit insns
2816	to calculate the value from scratch. */
2817	emit_insn_before (gen_rtx (SET, VOIDmode, tem,
2818	copy_rtx (v->new_reg)),
2819	loop_start);
2820	if (recog_memoized (PREV_INSN (loop_start)) < 0)
2821	{
2822	rtx sequence, ret;
2823
2824	/* We can't use bl->initial_value to compute the initial
2825	value, because the loop may have been preconditioned.
2826	We must calculate it from NEW_REG. Try using
2827	force_operand instead of emit_iv_add_mult. */
2828	delete_insn (PREV_INSN (loop_start));
2829
2830	start_sequence ();
2831	ret = force_operand (v->new_reg, tem);
2832	if (ret != tem)
2833	emit_move_insn (tem, ret);
2834	sequence = gen_sequence ();
2835	end_sequence ();
2836	emit_insn_before (sequence, loop_start);
2837
2838	if (loop_dump_stream)
2839	fprintf (loop_dump_stream,
2840	"Invalid init insn, rewritten.\n");
2841	}
2842	}
2843	else
2844	{
2845	v->dest_reg = value;
2846
2847	/* Check the resulting address for validity, and fail
2848	if the resulting address would be invalid. */
2849	if (! verify_addresses (v, giv_inc, unroll_number))
2850	{
2851	if (loop_dump_stream)
2852	fprintf (loop_dump_stream,
2853	"Invalid address for giv at insn %d\n",
2854	INSN_UID (v->insn));
2855	continue;
2856	}
2857	}
2858
2859	/* Store the value of dest_reg into the insn. This sharing
2860	will not be a problem as this insn will always be copied
2861	later. */
2862
2863	*v->location = v->dest_reg;
2864
2865	/* If this address giv is combined with a dest reg giv, then
2866	save the base giv's induction pointer so that we will be
2867	able to handle this address giv properly. The base giv
2868	itself does not have to be splittable. */
2869
2870	if (v->same && v->same->giv_type == DEST_REG)
2871	addr_combined_regs[REGNO (v->same->new_reg)] = v->same;
2872
2873	if (GET_CODE (v->new_reg) == REG)
2874	{
2875	/* This giv maybe hasn't been combined with any others.
2876	Make sure that it's giv is marked as splittable here. */
2877
2878	splittable_regs[REGNO (v->new_reg)] = value;
2879
2880	/* Make it appear to depend upon itself, so that the
2881	giv will be properly split in the main loop above. */
2882	if (! v->same)
2883	{
2884	v->same = v;
2885	addr_combined_regs[REGNO (v->new_reg)] = v;
2886	}
2887	}
2888
2889	if (loop_dump_stream)
2890	fprintf (loop_dump_stream, "DEST_ADDR giv being split.\n");
2891	}
2892	}
2893	else
2894	{
2895	#if 0
2896	/* Currently, unreduced giv's can't be split. This is not too much
2897	of a problem since unreduced giv's are not live across loop
2898	iterations anyways. When unrolling a loop completely though,
2899	it makes sense to reduce&split givs when possible, as this will
2900	result in simpler instructions, and will not require that a reg
2901	be live across loop iterations. */
2902
2903	splittable_regs[REGNO (v->dest_reg)] = value;
2904	fprintf (stderr, "Giv %d at insn %d not reduced\n",
2905	REGNO (v->dest_reg), INSN_UID (v->insn));
2906	#else
2907	continue;
2908	#endif
2909	}
2910
2911	/* Givs are only updated once by definition. Mark it so if this is
2912	a splittable register. Don't need to do anything for address givs
2913	where this may not be a register. */
2914
2915	if (GET_CODE (v->new_reg) == REG)
2916	splittable_regs_updates[REGNO (v->new_reg)] = 1;
2917
2918	result++;
2919
2920	if (loop_dump_stream)
2921	{
2922	int regnum;
2923
2924	if (GET_CODE (v->dest_reg) == CONST_INT)
2925	regnum = -1;
2926	else if (GET_CODE (v->dest_reg) != REG)
2927	regnum = REGNO (XEXP (v->dest_reg, 0));
2928	else
2929	regnum = REGNO (v->dest_reg);
2930	fprintf (loop_dump_stream, "Giv %d at insn %d safe to split.\n",
2931	regnum, INSN_UID (v->insn));
2932	}
2933	}
2934
2935	return result;
2936	}
2937
2938	/* Try to prove that the register is dead after the loop exits. Trace every
2939	loop exit looking for an insn that will always be executed, which sets
2940	the register to some value, and appears before the first use of the register
2941	is found. If successful, then return 1, otherwise return 0. */
2942
2943	/* ?? Could be made more intelligent in the handling of jumps, so that
2944	it can search past if statements and other similar structures. */
2945
2946	static int
2947	reg_dead_after_loop (reg, loop_start, loop_end)
2948	rtx reg, loop_start, loop_end;
2949	{
2950	rtx insn, label;
2951	enum rtx_code code;
2952	int jump_count = 0;
2953	int label_count = 0;
2954	int this_loop_num = uid_loop_num[INSN_UID (loop_start)];
2955
2956	/* In addition to checking all exits of this loop, we must also check
2957	all exits of inner nested loops that would exit this loop. We don't
2958	have any way to identify those, so we just give up if there are any
2959	such inner loop exits. */
2960
2961	for (label = loop_number_exit_labels[this_loop_num]; label;
2962	label = LABEL_NEXTREF (label))
2963	label_count++;
2964
2965	if (label_count != loop_number_exit_count[this_loop_num])
2966	return 0;
2967
2968	/* HACK: Must also search the loop fall through exit, create a label_ref
2969	here which points to the loop_end, and append the loop_number_exit_labels
2970	list to it. */
2971	label = gen_rtx (LABEL_REF, VOIDmode, loop_end);
2972	LABEL_NEXTREF (label) = loop_number_exit_labels[this_loop_num];
2973
2974	for ( ; label; label = LABEL_NEXTREF (label))
2975	{
2976	/* Succeed if find an insn which sets the biv or if reach end of
2977	function. Fail if find an insn that uses the biv, or if come to
2978	a conditional jump. */
2979
2980	insn = NEXT_INSN (XEXP (label, 0));
2981	while (insn)
2982	{
2983	code = GET_CODE (insn);
2984	if (GET_RTX_CLASS (code) == 'i')
2985	{
2986	rtx set;
2987
2988	if (reg_referenced_p (reg, PATTERN (insn)))
2989	return 0;
2990
2991	set = single_set (insn);
2992	if (set && rtx_equal_p (SET_DEST (set), reg))
2993	break;
2994	}
2995
2996	if (code == JUMP_INSN)
2997	{
2998	if (GET_CODE (PATTERN (insn)) == RETURN)
2999	break;
3000	else if (! simplejump_p (insn)
3001	/* Prevent infinite loop following infinite loops. */
3002	\|\| jump_count++ > 20)
3003	return 0;
3004	else
3005	insn = JUMP_LABEL (insn);
3006	}
3007
3008	insn = NEXT_INSN (insn);
3009	}
3010	}
3011
3012	/* Success, the register is dead on all loop exits. */
3013	return 1;
3014	}
3015
3016	/* Try to calculate the final value of the biv, the value it will have at
3017	the end of the loop. If we can do it, return that value. */
3018
3019	rtx
3020	final_biv_value (bl, loop_start, loop_end)
3021	struct iv_class *bl;
3022	rtx loop_start, loop_end;
3023	{
3024	rtx increment, tem;
3025
3026	/* ??? This only works for MODE_INT biv's. Reject all others for now. */
3027
3028	if (GET_MODE_CLASS (bl->biv->mode) != MODE_INT)
3029	return 0;
3030
3031	/* The final value for reversed bivs must be calculated differently than
3032	for ordinary bivs. In this case, there is already an insn after the
3033	loop which sets this biv's final value (if necessary), and there are
3034	no other loop exits, so we can return any value. */
3035	if (bl->reversed)
3036	{
3037	if (loop_dump_stream)
3038	fprintf (loop_dump_stream,
3039	"Final biv value for %d, reversed biv.\n", bl->regno);
3040
3041	return const0_rtx;
3042	}
3043
3044	/* Try to calculate the final value as initial value + (number of iterations
3045	* increment). For this to work, increment must be invariant, the only
3046	exit from the loop must be the fall through at the bottom (otherwise
3047	it may not have its final value when the loop exits), and the initial
3048	value of the biv must be invariant. */
3049
3050	if (loop_n_iterations != 0
3051	&& ! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]]
3052	&& invariant_p (bl->initial_value))
3053	{
3054	increment = biv_total_increment (bl, loop_start, loop_end);
3055
3056	if (increment && invariant_p (increment))
3057	{
3058	/* Can calculate the loop exit value, emit insns after loop
3059	end to calculate this value into a temporary register in
3060	case it is needed later. */
3061
3062	tem = gen_reg_rtx (bl->biv->mode);
3063	/* Make sure loop_end is not the last insn. */
3064	if (NEXT_INSN (loop_end) == 0)
3065	emit_note_after (NOTE_INSN_DELETED, loop_end);
3066	emit_iv_add_mult (increment, GEN_INT (loop_n_iterations),
3067	bl->initial_value, tem, NEXT_INSN (loop_end));
3068
3069	if (loop_dump_stream)
3070	fprintf (loop_dump_stream,
3071	"Final biv value for %d, calculated.\n", bl->regno);
3072
3073	return tem;
3074	}
3075	}
3076
3077	/* Check to see if the biv is dead at all loop exits. */
3078	if (reg_dead_after_loop (bl->biv->src_reg, loop_start, loop_end))
3079	{
3080	if (loop_dump_stream)
3081	fprintf (loop_dump_stream,
3082	"Final biv value for %d, biv dead after loop exit.\n",
3083	bl->regno);
3084
3085	return const0_rtx;
3086	}
3087
3088	return 0;
3089	}
3090
3091	/* Try to calculate the final value of the giv, the value it will have at
3092	the end of the loop. If we can do it, return that value. */
3093
3094	rtx
3095	final_giv_value (v, loop_start, loop_end)
3096	struct induction *v;
3097	rtx loop_start, loop_end;
3098	{
3099	struct iv_class *bl;
3100	rtx insn;
3101	rtx increment, tem;
3102	rtx insert_before, seq;
3103
3104	bl = reg_biv_class[REGNO (v->src_reg)];
3105
3106	/* The final value for givs which depend on reversed bivs must be calculated
3107	differently than for ordinary givs. In this case, there is already an
3108	insn after the loop which sets this giv's final value (if necessary),
3109	and there are no other loop exits, so we can return any value. */
3110	if (bl->reversed)
3111	{
3112	if (loop_dump_stream)
3113	fprintf (loop_dump_stream,
3114	"Final giv value for %d, depends on reversed biv\n",
3115	REGNO (v->dest_reg));
3116	return const0_rtx;
3117	}
3118
3119	/* Try to calculate the final value as a function of the biv it depends
3120	upon. The only exit from the loop must be the fall through at the bottom
3121	(otherwise it may not have its final value when the loop exits). */
3122
3123	/* ??? Can calculate the final giv value by subtracting off the
3124	extra biv increments times the giv's mult_val. The loop must have
3125	only one exit for this to work, but the loop iterations does not need
3126	to be known. */
3127
3128	if (loop_n_iterations != 0
3129	&& ! loop_number_exit_count[uid_loop_num[INSN_UID (loop_start)]])
3130	{
3131	/* ?? It is tempting to use the biv's value here since these insns will
3132	be put after the loop, and hence the biv will have its final value
3133	then. However, this fails if the biv is subsequently eliminated.
3134	Perhaps determine whether biv's are eliminable before trying to
3135	determine whether giv's are replaceable so that we can use the
3136	biv value here if it is not eliminable. */
3137
3138	increment = biv_total_increment (bl, loop_start, loop_end);
3139
3140	if (increment && invariant_p (increment))
3141	{
3142	/* Can calculate the loop exit value of its biv as
3143	(loop_n_iterations * increment) + initial_value */
3144
3145	/* The loop exit value of the giv is then
3146	(final_biv_value - extra increments) * mult_val + add_val.
3147	The extra increments are any increments to the biv which
3148	occur in the loop after the giv's value is calculated.
3149	We must search from the insn that sets the giv to the end
3150	of the loop to calculate this value. */
3151
3152	insert_before = NEXT_INSN (loop_end);
3153
3154	/* Put the final biv value in tem. */
3155	tem = gen_reg_rtx (bl->biv->mode);
3156	emit_iv_add_mult (increment, GEN_INT (loop_n_iterations),
3157	bl->initial_value, tem, insert_before);
3158
3159	/* Subtract off extra increments as we find them. */
3160	for (insn = NEXT_INSN (v->insn); insn != loop_end;
3161	insn = NEXT_INSN (insn))
3162	{
3163	struct induction *biv;
3164
3165	for (biv = bl->biv; biv; biv = biv->next_iv)
3166	if (biv->insn == insn)
3167	{
3168	start_sequence ();
3169	tem = expand_binop (GET_MODE (tem), sub_optab, tem,
3170	biv->add_val, NULL_RTX, 0,
3171	OPTAB_LIB_WIDEN);
3172	seq = gen_sequence ();
3173	end_sequence ();
3174	emit_insn_before (seq, insert_before);
3175	}
3176	}
3177
3178	/* Now calculate the giv's final value. */
3179	emit_iv_add_mult (tem, v->mult_val, v->add_val, tem,
3180	insert_before);
3181
3182	if (loop_dump_stream)
3183	fprintf (loop_dump_stream,
3184	"Final giv value for %d, calc from biv's value.\n",
3185	REGNO (v->dest_reg));
3186
3187	return tem;
3188	}
3189	}
3190
3191	/* Replaceable giv's should never reach here. */
3192	if (v->replaceable)
3193	abort ();
3194
3195	/* Check to see if the biv is dead at all loop exits. */
3196	if (reg_dead_after_loop (v->dest_reg, loop_start, loop_end))
3197	{
3198	if (loop_dump_stream)
3199	fprintf (loop_dump_stream,
3200	"Final giv value for %d, giv dead after loop exit.\n",
3201	REGNO (v->dest_reg));
3202
3203	return const0_rtx;
3204	}
3205
3206	return 0;
3207	}
3208
3209
3210	/* Calculate the number of loop iterations. Returns the exact number of loop
3211	iterations if it can be calculated, otherwise returns zero. */
3212
3213	unsigned HOST_WIDE_INT
3214	loop_iterations (loop_start, loop_end)
3215	rtx loop_start, loop_end;
3216	{
3217	rtx comparison, comparison_value;
3218	rtx iteration_var, initial_value, increment, final_value;
3219	enum rtx_code comparison_code;
3220	HOST_WIDE_INT i;
3221	int increment_dir;
3222	int unsigned_compare, compare_dir, final_larger;
3223	unsigned long tempu;
3224	rtx last_loop_insn;
3225
3226	/* First find the iteration variable. If the last insn is a conditional
3227	branch, and the insn before tests a register value, make that the
3228	iteration variable. */
3229
3230	loop_initial_value = 0;
3231	loop_increment = 0;
3232	loop_final_value = 0;
3233	loop_iteration_var = 0;
3234
3235	/* We used to use pren_nonnote_insn here, but that fails because it might
3236	accidentally get the branch for a contained loop if the branch for this
3237	loop was deleted. We can only trust branches immediately before the
3238	loop_end. */
3239	last_loop_insn = PREV_INSN (loop_end);
3240
3241	comparison = get_condition_for_loop (last_loop_insn);
3242	if (comparison == 0)
3243	{
3244	if (loop_dump_stream)
3245	fprintf (loop_dump_stream,
3246	"Loop unrolling: No final conditional branch found.\n");
3247	return 0;
3248	}
3249
3250	/* ??? Get_condition may switch position of induction variable and
3251	invariant register when it canonicalizes the comparison. */
3252
3253	comparison_code = GET_CODE (comparison);
3254	iteration_var = XEXP (comparison, 0);
3255	comparison_value = XEXP (comparison, 1);
3256
3257	if (GET_CODE (iteration_var) != REG)
3258	{
3259	if (loop_dump_stream)
3260	fprintf (loop_dump_stream,
3261	"Loop unrolling: Comparison not against register.\n");
3262	return 0;
3263	}
3264
3265	/* Loop iterations is always called before any new registers are created
3266	now, so this should never occur. */
3267
3268	if (REGNO (iteration_var) >= max_reg_before_loop)
3269	abort ();
3270
3271	iteration_info (iteration_var, &initial_value, &increment,
3272	loop_start, loop_end);
3273	if (initial_value == 0)
3274	/* iteration_info already printed a message. */
3275	return 0;
3276
3277	/* If the comparison value is an invariant register, then try to find
3278	its value from the insns before the start of the loop. */
3279
3280	if (GET_CODE (comparison_value) == REG && invariant_p (comparison_value))
3281	{
3282	rtx insn, set;
3283
3284	for (insn = PREV_INSN (loop_start); insn ; insn = PREV_INSN (insn))
3285	{
3286	if (GET_CODE (insn) == CODE_LABEL)
3287	break;
3288
3289	else if (GET_RTX_CLASS (GET_CODE (insn)) == 'i'
3290	&& reg_set_p (comparison_value, insn))
3291	{
3292	/* We found the last insn before the loop that sets the register.
3293	If it sets the entire register, and has a REG_EQUAL note,
3294	then use the value of the REG_EQUAL note. */
3295	if ((set = single_set (insn))
3296	&& (SET_DEST (set) == comparison_value))
3297	{
3298	rtx note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
3299
3300	/* Only use the REG_EQUAL note if it is a constant.
3301	Other things, divide in particular, will cause
3302	problems later if we use them. */
3303	if (note && GET_CODE (XEXP (note, 0)) != EXPR_LIST
3304	&& CONSTANT_P (XEXP (note, 0)))
3305	comparison_value = XEXP (note, 0);
3306	}
3307	break;
3308	}
3309	}
3310	}
3311
3312	final_value = approx_final_value (comparison_code, comparison_value,
3313	&unsigned_compare, &compare_dir);
3314
3315	/* Save the calculated values describing this loop's bounds, in case
3316	precondition_loop_p will need them later. These values can not be
3317	recalculated inside precondition_loop_p because strength reduction
3318	optimizations may obscure the loop's structure. */
3319
3320	loop_iteration_var = iteration_var;
3321	loop_initial_value = initial_value;
3322	loop_increment = increment;
3323	loop_final_value = final_value;
3324
3325	if (increment == 0)
3326	{
3327	if (loop_dump_stream)
3328	fprintf (loop_dump_stream,
3329	"Loop unrolling: Increment value can't be calculated.\n");
3330	return 0;
3331	}
3332	else if (GET_CODE (increment) != CONST_INT)
3333	{
3334	if (loop_dump_stream)
3335	fprintf (loop_dump_stream,
3336	"Loop unrolling: Increment value not constant.\n");
3337	return 0;
3338	}
3339	else if (GET_CODE (initial_value) != CONST_INT)
3340	{
3341	if (loop_dump_stream)
3342	fprintf (loop_dump_stream,
3343	"Loop unrolling: Initial value not constant.\n");
3344	return 0;
3345	}
3346	else if (final_value == 0)
3347	{
3348	if (loop_dump_stream)
3349	fprintf (loop_dump_stream,
3350	"Loop unrolling: EQ comparison loop.\n");
3351	return 0;
3352	}
3353	else if (GET_CODE (final_value) != CONST_INT)
3354	{
3355	if (loop_dump_stream)
3356	fprintf (loop_dump_stream,
3357	"Loop unrolling: Final value not constant.\n");
3358	return 0;
3359	}
3360
3361	/* ?? Final value and initial value do not have to be constants.
3362	Only their difference has to be constant. When the iteration variable
3363	is an array address, the final value and initial value might both
3364	be addresses with the same base but different constant offsets.
3365	Final value must be invariant for this to work.
3366
3367	To do this, need some way to find the values of registers which are
3368	invariant. */
3369
3370	/* Final_larger is 1 if final larger, 0 if they are equal, otherwise -1. */
3371	if (unsigned_compare)
3372	final_larger
3373	= ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3374	> (unsigned HOST_WIDE_INT) INTVAL (initial_value))
3375	- ((unsigned HOST_WIDE_INT) INTVAL (final_value)
3376	< (unsigned HOST_WIDE_INT) INTVAL (initial_value));
3377	else
3378	final_larger = (INTVAL (final_value) > INTVAL (initial_value))
3379	- (INTVAL (final_value) < INTVAL (initial_value));
3380
3381	if (INTVAL (increment) > 0)
3382	increment_dir = 1;
3383	else if (INTVAL (increment) == 0)
3384	increment_dir = 0;
3385	else
3386	increment_dir = -1;
3387
3388	/* There are 27 different cases: compare_dir = -1, 0, 1;
3389	final_larger = -1, 0, 1; increment_dir = -1, 0, 1.
3390	There are 4 normal cases, 4 reverse cases (where the iteration variable
3391	will overflow before the loop exits), 4 infinite loop cases, and 15
3392	immediate exit (0 or 1 iteration depending on loop type) cases.
3393	Only try to optimize the normal cases. */
3394
3395	/* (compare_dir/final_larger/increment_dir)
3396	Normal cases: (0/-1/-1), (0/1/1), (-1/-1/-1), (1/1/1)
3397	Reverse cases: (0/-1/1), (0/1/-1), (-1/-1/1), (1/1/-1)
3398	Infinite loops: (0/-1/0), (0/1/0), (-1/-1/0), (1/1/0)
3399	Immediate exit: (0/0/X), (-1/0/X), (-1/1/X), (1/0/X), (1/-1/X) */
3400
3401	/* ?? If the meaning of reverse loops (where the iteration variable
3402	will overflow before the loop exits) is undefined, then could
3403	eliminate all of these special checks, and just always assume
3404	the loops are normal/immediate/infinite. Note that this means
3405	the sign of increment_dir does not have to be known. Also,
3406	since it does not really hurt if immediate exit loops or infinite loops
3407	are optimized, then that case could be ignored also, and hence all
3408	loops can be optimized.
3409
3410	According to ANSI Spec, the reverse loop case result is undefined,
3411	because the action on overflow is undefined.
3412
3413	See also the special test for NE loops below. */
3414
3415	if (final_larger == increment_dir && final_larger != 0
3416	&& (final_larger == compare_dir \|\| compare_dir == 0))
3417	/* Normal case. */
3418	;
3419	else
3420	{
3421	if (loop_dump_stream)
3422	fprintf (loop_dump_stream,
3423	"Loop unrolling: Not normal loop.\n");
3424	return 0;
3425	}
3426
3427	/* Calculate the number of iterations, final_value is only an approximation,
3428	so correct for that. Note that tempu and loop_n_iterations are
3429	unsigned, because they can be as large as 2^n - 1. */
3430
3431	i = INTVAL (increment);
3432	if (i > 0)
3433	tempu = INTVAL (final_value) - INTVAL (initial_value);
3434	else if (i < 0)
3435	{
3436	tempu = INTVAL (initial_value) - INTVAL (final_value);
3437	i = -i;
3438	}
3439	else
3440	abort ();
3441
3442	/* For NE tests, make sure that the iteration variable won't miss the
3443	final value. If tempu mod i is not zero, then the iteration variable
3444	will overflow before the loop exits, and we can not calculate the
3445	number of iterations. */
3446	if (compare_dir == 0 && (tempu % i) != 0)
3447	return 0;
3448
3449	return tempu / i + ((tempu % i) != 0);
3450	}
3451
3452	/* Replace uses of split bivs with their split pseudo register. This is
3453	for original instructions which remain after loop unrolling without
3454	copying. */
3455
3456	static rtx
3457	remap_split_bivs (x)
3458	rtx x;
3459	{
3460	register enum rtx_code code;
3461	register int i;
3462	register char *fmt;
3463
3464	if (x == 0)
3465	return x;
3466
3467	code = GET_CODE (x);
3468	switch (code)
3469	{
3470	case SCRATCH:
3471	case PC:
3472	case CC0:
3473	case CONST_INT:
3474	case CONST_DOUBLE:
3475	case CONST:
3476	case SYMBOL_REF:
3477	case LABEL_REF:
3478	return x;
3479
3480	case REG:
3481	#if 0
3482	/* If non-reduced/final-value givs were split, then this would also
3483	have to remap those givs also. */
3484	#endif
3485	if (REGNO (x) < max_reg_before_loop
3486	&& reg_iv_type[REGNO (x)] == BASIC_INDUCT)
3487	return reg_biv_class[REGNO (x)]->biv->src_reg;
3488	}
3489
3490	fmt = GET_RTX_FORMAT (code);
3491	for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3492	{
3493	if (fmt[i] == 'e')
3494	XEXP (x, i) = remap_split_bivs (XEXP (x, i));
3495	if (fmt[i] == 'E')
3496	{
3497	register int j;
3498	for (j = 0; j < XVECLEN (x, i); j++)
3499	XVECEXP (x, i, j) = remap_split_bivs (XVECEXP (x, i, j));
3500	}
3501	}
3502	return x;
3503	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: